4 * LZX compression routines
8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 3 of the License, or (at your option)
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more
22 * You should have received a copy of the GNU General Public License
23 * along with wimlib; if not, see http://www.gnu.org/licenses/.
28 * This file contains a compressor for the LZX compression format, as used in
29 * the WIM file format.
34 * First, the primary reference for the LZX compression format is the
35 * specification released by Microsoft.
37 * Second, the comments in lzx-decompress.c provide some more information about
38 * the LZX compression format, including errors in the Microsoft specification.
40 * Do note that LZX shares many similarities with DEFLATE, the algorithm used by
41 * zlib and gzip. Both LZX and DEFLATE use LZ77 matching and Huffman coding,
42 * and certain other details are quite similar, such as the method for storing
43 * Huffman codes. However, some of the main differences are:
45 * - LZX preprocesses the data before attempting to compress it.
46 * - LZX uses a "main" alphabet which combines literals and matches, with the
47 * match symbols containing a "length header" (giving all or part of the match
48 * length) and a "position footer" (giving, roughly speaking, the order of
49 * magnitude of the match offset).
50 * - LZX does not have static Huffman blocks; however it does have two types of
51 * dynamic Huffman blocks ("aligned offset" and "verbatim").
52 * - LZX has a minimum match length of 2 rather than 3.
53 * - In LZX, match offsets 0 through 2 actually represent entries in a LRU queue
59 * There are actually two distinct overall algorithms implemented here. We
60 * shall refer to them as the "slow" algorithm and the "fast" algorithm. The
61 * "slow" algorithm spends more time compressing to achieve a higher compression
62 * ratio compared to the "fast" algorithm. More details are presented below.
67 * The "slow" algorithm to generate LZX-compressed data is roughly as follows:
69 * 1. Preprocess the input data to translate the targets of x86 call instructions
70 * to absolute offsets.
72 * 2. Determine the best known sequence of LZ77 matches ((offset, length) pairs)
73 * and literal bytes to divide the input into. Raw match-finding is done
74 * using a very clever binary tree search based on the "Bt3" algorithm from
75 * 7-Zip. Parsing, or match-choosing, is solved essentially as a
76 * minimum-cost path problem, but using a heuristic forward search based on
77 * the Deflate encoder from 7-Zip rather than a more intuitive backward
78 * search, the latter of which would naively require that all matches be
79 * found. This heuristic search, as well as other heuristics such as limits
80 * on the matches considered, considerably speed up this part of the
81 * algorithm, which is the main bottleneck. Finally, after matches and
82 * literals are chosen, the needed Huffman codes needed to output them are
85 * 3. Up to a certain number of iterations, use the resulting Huffman codes to
86 * refine a cost model and go back to Step #2 to determine an improved
87 * sequence of matches and literals.
89 * 4. Up to a certain depth, try splitting the current block to see if the
90 * compression ratio can be improved. This may be the case if parts of the
91 * input differ greatly from each other and could benefit from different
94 * 5. Output the resulting block(s) using the match/literal sequences and the
95 * Huffman codes that were computed for each block.
100 * The fast algorithm (and the only one available in wimlib v1.5.1 and earlier)
101 * spends much less time on the main bottlenecks of the compression process ---
102 * that is the match finding, match choosing, and block splitting. Matches are
103 * found and chosen with hash chains using a greedy parse with one position of
104 * look-ahead. No block splitting is done; only compressing the full input into
105 * an aligned offset block is considered.
110 * The old API (retained for backward compatibility) consists of just one function:
112 * wimlib_lzx_compress()
114 * The new compressor has more potential parameters and needs more memory, so
115 * the new API ties up memory allocations and compression parameters into a
118 * wimlib_lzx_alloc_context()
119 * wimlib_lzx_compress2()
120 * wimlib_lzx_free_context()
122 * Both wimlib_lzx_compress() and wimlib_lzx_compress2() are designed to
123 * compress an in-memory buffer of up to 32768 bytes. There is no sliding
124 * window. This is suitable for the WIM format, which uses fixed-size chunks
125 * that are seemingly always 32768 bytes. If needed, the compressor potentially
126 * could be extended to support a larger and/or sliding window.
128 * Both wimlib_lzx_compress() and wimlib_lzx_compress2() return 0 if the data
129 * could not be compressed to less than the size of the uncompressed data.
130 * Again, this is suitable for the WIM format, which stores such data chunks
133 * The functions in this API are exported from the library, although this is
134 * only in case other programs happen to have uses for it other than WIM
135 * reading/writing as already handled through the rest of the library.
140 * Acknowledgments to several other open-source projects that made it possible
141 * to implement this code:
143 * - 7-Zip (author: Igor Pavlov), for the binary tree match-finding
144 * algorithm, the heuristic near-optimal forward match-choosing
145 * algorithm, and the block splitting algorithm.
147 * - zlib (author: Jean-loup Gailly and Mark Adler), for the hash table
148 * match-finding algorithm.
150 * - lzx-compress (author: Matthew T. Russotto), on which some parts of this
151 * code were originally based.
159 #include "wimlib/compress.h"
160 #include "wimlib/error.h"
161 #include "wimlib/lzx.h"
162 #include "wimlib/util.h"
164 #ifdef ENABLE_LZX_DEBUG
165 # include <wimlib/decompress.h>
170 /* Experimental parameters not exposed through the API */
171 #define LZX_PARAM_OPTIM_ARRAY_SIZE 1024
172 #define LZX_PARAM_ACCOUNT_FOR_LRU 1
173 #define LZX_PARAM_DONT_SKIP_MATCHES 0
174 #define LZX_PARAM_USE_EMPIRICAL_DEFAULT_COSTS 1
176 /* Currently, this constant can't simply be changed because the code currently
177 * uses a static number of position slots. */
178 #define LZX_MAX_WINDOW_SIZE 32768
180 /* This may be WIM-specific */
181 #define LZX_DEFAULT_BLOCK_SIZE 32768
183 #define LZX_LZ_HASH_BITS 15
184 #define LZX_LZ_HASH_SIZE (1 << LZX_LZ_HASH_BITS)
185 #define LZX_LZ_HASH_MASK (LZX_LZ_HASH_SIZE - 1)
186 #define LZX_LZ_HASH_SHIFT 5
188 /* Codewords for the LZX main, length, and aligned offset Huffman codes */
189 struct lzx_codewords {
190 u16 main[LZX_MAINTREE_NUM_SYMBOLS];
191 u16 len[LZX_LENTREE_NUM_SYMBOLS];
192 u16 aligned[LZX_ALIGNEDTREE_NUM_SYMBOLS];
195 /* Lengths for the LZX main, length, and aligned offset Huffman codes */
197 u8 main[LZX_MAINTREE_NUM_SYMBOLS];
198 u8 len[LZX_LENTREE_NUM_SYMBOLS];
199 u8 aligned[LZX_ALIGNEDTREE_NUM_SYMBOLS];
202 /* The LZX main, length, and aligned offset Huffman codes */
204 struct lzx_codewords codewords;
205 struct lzx_lens lens;
208 /* Tables for tallying symbol frequencies in the three LZX alphabets */
210 freq_t main[LZX_MAINTREE_NUM_SYMBOLS];
211 freq_t len[LZX_LENTREE_NUM_SYMBOLS];
212 freq_t aligned[LZX_ALIGNEDTREE_NUM_SYMBOLS];
215 /* LZX intermediate match/literal format */
219 * 31 1 if a match, 0 if a literal.
221 * 30-25 position slot. This can be at most 50, so it will fit in 6
224 * 8-24 position footer. This is the offset of the real formatted
225 * offset from the position base. This can be at most 17 bits
226 * (since lzx_extra_bits[LZX_NUM_POSITION_SLOTS - 1] is 17).
228 * 0-7 length of match, minus 2. This can be at most
229 * (LZX_MAX_MATCH - 2) == 255, so it will fit in 8 bits. */
233 /* Raw LZ match/literal format: just a length and offset.
235 * The length is the number of bytes of the match, and the offset is the number
236 * of bytes back in the input the match is from the matched text.
238 * If @len < LZX_MIN_MATCH, then it's really just a literal byte. */
244 /* Specification for a LZX block */
245 struct lzx_block_spec {
247 /* Set to 1 if this block has been split (in two --- we only considser
248 * binary splits). In such cases the rest of the fields are
249 * unimportant, since the relevant information is rather in the
250 * structures for the sub-blocks. */
253 /* One of the LZX_BLOCKTYPE_* constants indicating which type of this
257 /* 0-based position in the window at which this block starts. */
260 /* The number of bytes of uncompressed data this block represents. */
263 /* The position in the 'chosen_matches' array in the `struct
264 * lzx_compressor' at which the match/literal specifications for
265 * this block begin. */
266 unsigned chosen_matches_start_pos;
268 /* The number of match/literal specifications for this block. */
269 u16 num_chosen_matches;
271 /* Huffman codes for this block. */
272 struct lzx_codes codes;
276 * An array of these structures is used during the match-choosing algorithm.
277 * They correspond to consecutive positions in the window and are used to keep
278 * track of the cost to reach each position, and the match/literal choices that
279 * need to be chosen to reach that position.
282 /* The approximate minimum cost, in bits, to reach this position in the
283 * window which has been found so far. */
286 /* The union here is just for clarity, since the fields are used in two
287 * slightly different ways. Initially, the @prev structure is filled in
288 * first, and links go from later in the window to earlier in the
289 * window. Later, @next structure is filled in and links go from
290 * earlier in the window to later in the window. */
293 /* Position of the start of the match or literal that
294 * was taken to get to this position in the approximate
295 * minimum-cost parse. */
298 /* Offset, relative to its starting position, of the
299 * match or literal that was taken to get to this
300 * position in the approximate minimum-cost parse. */
304 /* Position at which the match or literal starting at
305 * this position ends in the minimum-cost parse. */
308 /* Offset, relative to its starting position, of the
309 * match or literal starting at this position in the
310 * approximate minimum-cost parse. */
314 #if LZX_PARAM_ACCOUNT_FOR_LRU
315 struct lzx_lru_queue queue;
319 /* State of the LZX compressor */
320 struct lzx_compressor {
322 /* The parameters that were used to create the compressor. */
323 struct wimlib_lzx_params params;
325 /* The buffer of data to be compressed.
327 * 0xe8 byte preprocessing is done directly on the data here before
328 * further compression.
330 * Note that this compressor does *not* use a sliding window!!!!
331 * It's not needed in the WIM format, since every chunk is compressed
332 * independently. This is by design, to allow random access to the
335 * We reserve a few extra bytes to potentially allow reading off the end
336 * of the array in the match-finding code for optimization purposes.
338 u8 window[LZX_MAX_WINDOW_SIZE + 12];
340 /* Number of bytes of data to be compressed, which is the number of
341 * bytes of data in @window that are actually valid. */
342 unsigned window_size;
344 /* The current match offset LRU queue. */
345 struct lzx_lru_queue queue;
347 /* Space for sequence of matches/literals that were chosen.
349 * Each LZX_MAX_WINDOW_SIZE-sized portion of this array is used for a
350 * different block splitting level. */
351 struct lzx_match *chosen_matches;
353 /* Structures used during block splitting.
355 * This can be thought of as a binary tree. block_specs[(1) - 1]
356 * represents to the top-level block (root node), and block_specs[(i*2)
357 * - 1] and block_specs[(i*2+1) - 1] represent the sub-blocks (child
358 * nodes) resulting from a binary split of the block represented by
359 * block_spec[(i) - 1].
361 struct lzx_block_spec *block_specs;
363 /* This is simply filled in with zeroes and used to avoid special-casing
364 * the output of the first compressed Huffman code, which conceptually
365 * has a delta taken from a code with all symbols having zero-length
367 struct lzx_codes zero_codes;
369 /* Slow algorithm only: The current cost model. */
370 struct lzx_lens costs;
372 /* Slow algorithm only: Table that maps the hash codes for 3 character
373 * sequences to the most recent position that sequence (or a sequence
374 * sharing the same hash code) appeared in the window. */
377 /* Slow algorithm only: Table that maps 2-character sequences to the
378 * most recent position that sequence appeared in the window. */
381 /* Slow algorithm only: Table that contains the logical child pointers
382 * in the binary trees in the match-finding code.
384 * child_tab[i*2] and child_tab[i*2+1] are the left and right pointers,
385 * respectively, from the binary tree root corresponding to window
389 /* Slow algorithm only: Matches that were already found and are saved in
390 * memory for subsequent queries (e.g. when block splitting). */
391 struct raw_match *cached_matches;
393 /* Slow algorithm only: Next position in 'cached_matches' to either
394 * return or fill in. */
395 unsigned cached_matches_pos;
397 /* Slow algorithm only: %true if reading from 'cached_matches'; %false
398 * if writing to 'cached_matches'. */
399 bool matches_already_found;
401 /* Slow algorithm only: Position in window of next match to return. */
402 unsigned match_window_pos;
404 /* Slow algorithm only: No matches returned shall reach past this
406 unsigned match_window_end;
408 /* Slow algorithm only: Temporary space used for match-choosing
411 * The size of this array must be at least LZX_MAX_MATCH but otherwise
412 * is arbitrary. More space simply allows the match-choosing algorithm
413 * to find better matches (depending on the input, as always). */
414 struct lzx_optimal *optimum;
416 /* Slow algorithm only: Variables used by the match-choosing algorithm.
418 * When matches have been chosen, optimum_cur_idx is set to the position
419 * in the window of the next match/literal to return and optimum_end_idx
420 * is set to the position in the window at the end of the last
421 * match/literal to return. */
426 /* Returns the LZX position slot that corresponds to a given formatted offset.
428 * Logically, this returns the smallest i such that
429 * formatted_offset >= lzx_position_base[i].
431 * The actual implementation below takes advantage of the regularity of the
432 * numbers in the lzx_position_base array to calculate the slot directly from
433 * the formatted offset without actually looking at the array.
436 lzx_get_position_slot(unsigned formatted_offset)
440 * Slots 36-49 (formatted_offset >= 262144) can be found by
441 * (formatted_offset/131072) + 34 == (formatted_offset >> 17) + 34;
442 * however, this check for formatted_offset >= 262144 is commented out
443 * because WIM chunks cannot be that large.
445 if (formatted_offset >= 262144) {
446 return (formatted_offset >> 17) + 34;
450 /* Note: this part here only works if:
452 * 2 <= formatted_offset < 655360
454 * It is < 655360 because the frequency of the position bases
455 * increases starting at the 655360 entry, and it is >= 2
456 * because the below calculation fails if the most significant
457 * bit is lower than the 2's place. */
458 LZX_ASSERT(2 <= formatted_offset && formatted_offset < 655360);
459 unsigned mssb_idx = bsr32(formatted_offset);
460 return (mssb_idx << 1) |
461 ((formatted_offset >> (mssb_idx - 1)) & 1);
465 /* Compute the hash code for the next 3-character sequence in the window. */
467 lzx_lz_compute_hash(const u8 *window)
472 hash <<= LZX_LZ_HASH_SHIFT;
474 hash <<= LZX_LZ_HASH_SHIFT;
476 return hash & LZX_LZ_HASH_MASK;
479 /* Build the main, length, and aligned offset Huffman codes used in LZX.
481 * This takes as input the frequency tables for each code and produces as output
482 * a set of tables that map symbols to codewords and lengths. */
484 lzx_make_huffman_codes(const struct lzx_freqs *freqs,
485 struct lzx_codes *codes)
487 make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS,
488 LZX_MAX_CODEWORD_LEN,
491 codes->codewords.main);
493 make_canonical_huffman_code(LZX_LENTREE_NUM_SYMBOLS,
494 LZX_MAX_CODEWORD_LEN,
497 codes->codewords.len);
499 make_canonical_huffman_code(LZX_ALIGNEDTREE_NUM_SYMBOLS, 8,
502 codes->codewords.aligned);
506 * Output a LZX match.
508 * @out: The bitstream to write the match to.
509 * @block_type: The type of the LZX block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM)
511 * @codes: Pointer to a structure that contains the codewords for the
512 * main, length, and aligned offset Huffman codes.
515 lzx_write_match(struct output_bitstream *out, int block_type,
516 struct lzx_match match, const struct lzx_codes *codes)
518 /* low 8 bits are the match length minus 2 */
519 unsigned match_len_minus_2 = match.data & 0xff;
520 /* Next 17 bits are the position footer */
521 unsigned position_footer = (match.data >> 8) & 0x1ffff; /* 17 bits */
522 /* Next 6 bits are the position slot. */
523 unsigned position_slot = (match.data >> 25) & 0x3f; /* 6 bits */
526 unsigned len_pos_header;
527 unsigned main_symbol;
528 unsigned num_extra_bits;
529 unsigned verbatim_bits;
530 unsigned aligned_bits;
532 /* If the match length is less than MIN_MATCH (= 2) +
533 * NUM_PRIMARY_LENS (= 7), the length header contains
534 * the match length minus MIN_MATCH, and there is no
537 * Otherwise, the length header contains
538 * NUM_PRIMARY_LENS, and the length footer contains
539 * the match length minus NUM_PRIMARY_LENS minus
541 if (match_len_minus_2 < LZX_NUM_PRIMARY_LENS) {
542 len_header = match_len_minus_2;
543 /* No length footer-- mark it with a special
545 len_footer = (unsigned)(-1);
547 len_header = LZX_NUM_PRIMARY_LENS;
548 len_footer = match_len_minus_2 - LZX_NUM_PRIMARY_LENS;
551 /* Combine the position slot with the length header into
552 * a single symbol that will be encoded with the main
554 len_pos_header = (position_slot << 3) | len_header;
556 /* The actual main symbol is offset by LZX_NUM_CHARS because
557 * values under LZX_NUM_CHARS are used to indicate a literal
558 * byte rather than a match. */
559 main_symbol = len_pos_header + LZX_NUM_CHARS;
561 /* Output main symbol. */
562 bitstream_put_bits(out, codes->codewords.main[main_symbol],
563 codes->lens.main[main_symbol]);
565 /* If there is a length footer, output it using the
566 * length Huffman code. */
567 if (len_footer != (unsigned)(-1)) {
568 bitstream_put_bits(out, codes->codewords.len[len_footer],
569 codes->lens.len[len_footer]);
572 num_extra_bits = lzx_get_num_extra_bits(position_slot);
574 /* For aligned offset blocks with at least 3 extra bits, output the
575 * verbatim bits literally, then the aligned bits encoded using the
576 * aligned offset tree. Otherwise, only the verbatim bits need to be
578 if ((block_type == LZX_BLOCKTYPE_ALIGNED) && (num_extra_bits >= 3)) {
580 verbatim_bits = position_footer >> 3;
581 bitstream_put_bits(out, verbatim_bits,
584 aligned_bits = (position_footer & 7);
585 bitstream_put_bits(out,
586 codes->codewords.aligned[aligned_bits],
587 codes->lens.aligned[aligned_bits]);
589 /* verbatim bits is the same as the position
590 * footer, in this case. */
591 bitstream_put_bits(out, position_footer, num_extra_bits);
596 lzx_build_precode(const u8 lens[restrict],
597 const u8 prev_lens[restrict],
599 freq_t precode_freqs[restrict LZX_PRETREE_NUM_SYMBOLS],
600 u8 output_syms[restrict num_syms],
601 u8 precode_lens[restrict LZX_PRETREE_NUM_SYMBOLS],
602 u16 precode_codewords[restrict LZX_PRETREE_NUM_SYMBOLS],
603 unsigned * num_additional_bits_ret)
605 unsigned output_syms_idx;
606 unsigned cur_run_len;
609 unsigned additional_bits;
611 unsigned num_additional_bits = 0;
613 memset(precode_freqs, 0,
614 LZX_PRETREE_NUM_SYMBOLS * sizeof(precode_freqs[0]));
616 /* Since the code word lengths use a form of RLE encoding, the goal here
617 * is to find each run of identical lengths when going through them in
618 * symbol order (including runs of length 1). For each run, as many
619 * lengths are encoded using RLE as possible, and the rest are output
622 * output_syms[] will be filled in with the length symbols that will be
623 * output, including RLE codes, not yet encoded using the pre-tree.
625 * cur_run_len keeps track of how many code word lengths are in the
626 * current run of identical lengths.
630 for (i = 1; i <= num_syms; i++) {
632 if (i != num_syms && lens[i] == lens[i - 1]) {
633 /* Still in a run--- keep going. */
638 /* Run ended! Check if it is a run of zeroes or a run of
641 /* The symbol that was repeated in the run--- not to be confused
642 * with the length *of* the run (cur_run_len) */
643 len_in_run = lens[i - 1];
645 if (len_in_run == 0) {
646 /* A run of 0's. Encode it in as few length
647 * codes as we can. */
649 /* The magic length 18 indicates a run of 20 + n zeroes,
650 * where n is an uncompressed literal 5-bit integer that
651 * follows the magic length. */
652 while (cur_run_len >= 20) {
654 additional_bits = min(cur_run_len - 20, 0x1f);
655 num_additional_bits += 5;
657 output_syms[output_syms_idx++] = 18;
658 output_syms[output_syms_idx++] = additional_bits;
659 cur_run_len -= 20 + additional_bits;
662 /* The magic length 17 indicates a run of 4 + n zeroes,
663 * where n is an uncompressed literal 4-bit integer that
664 * follows the magic length. */
665 while (cur_run_len >= 4) {
666 additional_bits = min(cur_run_len - 4, 0xf);
667 num_additional_bits += 4;
669 output_syms[output_syms_idx++] = 17;
670 output_syms[output_syms_idx++] = additional_bits;
671 cur_run_len -= 4 + additional_bits;
676 /* A run of nonzero lengths. */
678 /* The magic length 19 indicates a run of 4 + n
679 * nonzeroes, where n is a literal bit that follows the
680 * magic length, and where the value of the lengths in
681 * the run is given by an extra length symbol, encoded
682 * with the precode, that follows the literal bit.
684 * The extra length symbol is encoded as a difference
685 * from the length of the codeword for the first symbol
686 * in the run in the previous tree.
688 while (cur_run_len >= 4) {
689 additional_bits = (cur_run_len > 4);
690 num_additional_bits += 1;
691 delta = (signed char)prev_lens[i - cur_run_len] -
692 (signed char)len_in_run;
696 precode_freqs[(unsigned char)delta]++;
697 output_syms[output_syms_idx++] = 19;
698 output_syms[output_syms_idx++] = additional_bits;
699 output_syms[output_syms_idx++] = delta;
700 cur_run_len -= 4 + additional_bits;
704 /* Any remaining lengths in the run are outputted without RLE,
705 * as a difference from the length of that codeword in the
707 while (cur_run_len > 0) {
708 delta = (signed char)prev_lens[i - cur_run_len] -
709 (signed char)len_in_run;
713 precode_freqs[(unsigned char)delta]++;
714 output_syms[output_syms_idx++] = delta;
721 /* Build the precode from the frequencies of the length symbols. */
723 make_canonical_huffman_code(LZX_PRETREE_NUM_SYMBOLS,
724 LZX_MAX_CODEWORD_LEN,
725 precode_freqs, precode_lens,
728 if (num_additional_bits_ret)
729 *num_additional_bits_ret = num_additional_bits;
731 return output_syms_idx;
735 * Writes a compressed Huffman code to the output, preceded by the precode for
738 * The Huffman code is represented in the output as a series of path lengths
739 * from which the canonical Huffman code can be reconstructed. The path lengths
740 * themselves are compressed using a separate Huffman code, the precode, which
741 * consists of LZX_PRETREE_NUM_SYMBOLS (= 20) symbols that cover all possible
742 * code lengths, plus extra codes for repeated lengths. The path lengths of the
743 * precode precede the path lengths of the larger code and are uncompressed,
744 * consisting of 20 entries of 4 bits each.
746 * @out: Bitstream to write the code to.
747 * @lens: The code lengths for the Huffman code, indexed by symbol.
748 * @prev_lens: Code lengths for this Huffman code, indexed by symbol,
749 * in the *previous block*, or all zeroes if this is the
751 * @num_syms: The number of symbols in the code.
754 lzx_write_compressed_code(struct output_bitstream *out,
755 const u8 lens[restrict],
756 const u8 prev_lens[restrict],
759 freq_t precode_freqs[LZX_PRETREE_NUM_SYMBOLS];
760 u8 output_syms[num_syms];
761 u8 precode_lens[LZX_PRETREE_NUM_SYMBOLS];
762 u16 precode_codewords[LZX_PRETREE_NUM_SYMBOLS];
764 unsigned num_output_syms;
767 num_output_syms = lzx_build_precode(lens,
776 /* Write the lengths of the precode codes to the output. */
777 for (i = 0; i < LZX_PRETREE_NUM_SYMBOLS; i++)
778 bitstream_put_bits(out, precode_lens[i],
779 LZX_PRETREE_ELEMENT_SIZE);
781 /* Write the length symbols, encoded with the precode, to the output. */
783 for (i = 0; i < num_output_syms; ) {
784 precode_sym = output_syms[i++];
786 bitstream_put_bits(out, precode_codewords[precode_sym],
787 precode_lens[precode_sym]);
788 switch (precode_sym) {
790 bitstream_put_bits(out, output_syms[i++], 4);
793 bitstream_put_bits(out, output_syms[i++], 5);
796 bitstream_put_bits(out, output_syms[i++], 1);
797 bitstream_put_bits(out,
798 precode_codewords[output_syms[i]],
799 precode_lens[output_syms[i]]);
809 lzx_huffman_code_output_cost(const u8 lens[restrict],
810 const freq_t freqs[restrict],
815 for (unsigned i = 0; i < num_syms; i++)
816 cost += (unsigned)lens[i] * (unsigned)freqs[i];
821 /* Return the number of bits required to output the lengths for the specified
822 * Huffman code in compressed format (encoded with a precode). */
824 lzx_code_cost(const u8 lens[], const u8 prev_lens[], unsigned num_syms)
826 u8 output_syms[num_syms];
827 freq_t precode_freqs[LZX_PRETREE_NUM_SYMBOLS];
828 u8 precode_lens[LZX_PRETREE_NUM_SYMBOLS];
829 u16 precode_codewords[LZX_PRETREE_NUM_SYMBOLS];
831 unsigned num_additional_bits;
833 /* Acount for the lengths of the precode itself. */
834 cost += LZX_PRETREE_NUM_SYMBOLS * LZX_PRETREE_ELEMENT_SIZE;
836 lzx_build_precode(lens, prev_lens, num_syms,
837 precode_freqs, output_syms,
838 precode_lens, precode_codewords,
839 &num_additional_bits);
841 /* Account for all precode symbols output. */
842 cost += lzx_huffman_code_output_cost(precode_lens, precode_freqs,
843 LZX_PRETREE_NUM_SYMBOLS);
845 /* Account for additional bits. */
846 cost += num_additional_bits;
852 * Writes all compressed matches and literal bytes in a LZX block to the the
855 * @out: The output bitstream.
856 * @block_type: The type of the block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM)
857 * @match_tab[]: The array of matches that will be output. It has length
858 * of @num_compressed_literals.
859 * @num_compressed_literals: Number of compressed literals to be output.
860 * @codes: Pointer to a structure that contains the codewords for the
861 * main, length, and aligned offset Huffman codes.
864 lzx_write_matches_and_literals(struct output_bitstream *ostream,
866 const struct lzx_match match_tab[],
867 unsigned match_count,
868 const struct lzx_codes *codes)
870 for (unsigned i = 0; i < match_count; i++) {
871 struct lzx_match match = match_tab[i];
873 /* High bit of the match indicates whether the match is an
874 * actual match (1) or a literal uncompressed byte (0) */
875 if (match.data & 0x80000000) {
877 lzx_write_match(ostream, block_type,
881 bitstream_put_bits(ostream,
882 codes->codewords.main[match.data],
883 codes->lens.main[match.data]);
890 lzx_assert_codes_valid(const struct lzx_codes * codes)
892 #ifdef ENABLE_LZX_DEBUG
895 for (i = 0; i < LZX_MAINTREE_NUM_SYMBOLS; i++)
896 LZX_ASSERT(codes->lens.main[i] <= LZX_MAX_CODEWORD_LEN);
898 for (i = 0; i < LZX_LENTREE_NUM_SYMBOLS; i++)
899 LZX_ASSERT(codes->lens.len[i] <= LZX_MAX_CODEWORD_LEN);
901 for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
902 LZX_ASSERT(codes->lens.aligned[i] <= 8);
904 const unsigned tablebits = 10;
905 u16 decode_table[(1 << tablebits) +
906 (2 * max(LZX_MAINTREE_NUM_SYMBOLS, LZX_LENTREE_NUM_SYMBOLS))]
907 _aligned_attribute(DECODE_TABLE_ALIGNMENT);
908 LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
909 LZX_MAINTREE_NUM_SYMBOLS,
912 LZX_MAX_CODEWORD_LEN));
913 LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
914 LZX_LENTREE_NUM_SYMBOLS,
917 LZX_MAX_CODEWORD_LEN));
918 LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
919 LZX_ALIGNEDTREE_NUM_SYMBOLS,
923 #endif /* ENABLE_LZX_DEBUG */
926 /* Write a LZX aligned offset or verbatim block to the output. */
928 lzx_write_compressed_block(int block_type,
930 struct lzx_match * chosen_matches,
931 unsigned num_chosen_matches,
932 const struct lzx_codes * codes,
933 const struct lzx_codes * prev_codes,
934 struct output_bitstream * ostream)
938 LZX_ASSERT(block_type == LZX_BLOCKTYPE_ALIGNED ||
939 block_type == LZX_BLOCKTYPE_VERBATIM);
940 LZX_ASSERT(block_size <= LZX_MAX_WINDOW_SIZE);
941 LZX_ASSERT(num_chosen_matches <= LZX_MAX_WINDOW_SIZE);
942 lzx_assert_codes_valid(codes);
944 /* The first three bits indicate the type of block and are one of the
945 * LZX_BLOCKTYPE_* constants. */
946 bitstream_put_bits(ostream, block_type, LZX_BLOCKTYPE_NBITS);
948 /* The next bit indicates whether the block size is the default (32768),
949 * indicated by a 1 bit, or whether the block size is given by the next
950 * 16 bits, indicated by a 0 bit. */
951 if (block_size == LZX_DEFAULT_BLOCK_SIZE) {
952 bitstream_put_bits(ostream, 1, 1);
954 bitstream_put_bits(ostream, 0, 1);
955 bitstream_put_bits(ostream, block_size, LZX_BLOCKSIZE_NBITS);
958 /* Write out lengths of the main code. Note that the LZX specification
959 * incorrectly states that the aligned offset code comes after the
960 * length code, but in fact it is the very first tree to be written
961 * (before the main code). */
962 if (block_type == LZX_BLOCKTYPE_ALIGNED)
963 for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
964 bitstream_put_bits(ostream, codes->lens.aligned[i],
965 LZX_ALIGNEDTREE_ELEMENT_SIZE);
967 LZX_DEBUG("Writing main code...");
969 /* Write the pre-tree and lengths for the first LZX_NUM_CHARS symbols in
970 * the main code, which are the codewords for literal bytes. */
971 lzx_write_compressed_code(ostream,
973 prev_codes->lens.main,
976 /* Write the pre-tree and lengths for the rest of the main code, which
977 * are the codewords for match headers. */
978 lzx_write_compressed_code(ostream,
979 codes->lens.main + LZX_NUM_CHARS,
980 prev_codes->lens.main + LZX_NUM_CHARS,
981 LZX_MAINTREE_NUM_SYMBOLS - LZX_NUM_CHARS);
983 LZX_DEBUG("Writing length code...");
985 /* Write the pre-tree and lengths for the length code. */
986 lzx_write_compressed_code(ostream,
988 prev_codes->lens.len,
989 LZX_LENTREE_NUM_SYMBOLS);
991 LZX_DEBUG("Writing matches and literals...");
993 /* Write the actual matches and literals. */
994 lzx_write_matches_and_literals(ostream, block_type,
995 chosen_matches, num_chosen_matches,
998 LZX_DEBUG("Done writing block.");
1001 /* Write the LZX block of index @block_number, or recurse to its children
1002 * recursively if it is a split block.
1004 * Return a pointer to the Huffman codes for the last block written.
1006 static struct lzx_codes *
1007 lzx_write_block_recursive(struct lzx_compressor *ctx,
1008 unsigned block_number,
1009 struct lzx_codes * prev_codes,
1010 struct output_bitstream *ostream)
1012 struct lzx_block_spec *spec = &ctx->block_specs[block_number - 1];
1014 if (spec->is_split) {
1015 prev_codes = lzx_write_block_recursive(ctx, block_number * 2 + 0,
1016 prev_codes, ostream);
1017 prev_codes = lzx_write_block_recursive(ctx, block_number * 2 + 1,
1018 prev_codes, ostream);
1020 LZX_DEBUG("Writing block #%u (type=%d, size=%u, num_chosen_matches=%u)...",
1021 block_number, spec->block_type, spec->block_size,
1022 spec->num_chosen_matches);
1023 lzx_write_compressed_block(spec->block_type,
1025 &ctx->chosen_matches[spec->chosen_matches_start_pos],
1026 spec->num_chosen_matches,
1030 prev_codes = &spec->codes;
1035 /* Write out the LZX blocks that were computed. */
1037 lzx_write_all_blocks(struct lzx_compressor *ctx, struct output_bitstream *ostream)
1039 lzx_write_block_recursive(ctx, 1, &ctx->zero_codes, ostream);
1043 lzx_record_literal(u8 literal, void *_freqs)
1045 struct lzx_freqs *freqs = _freqs;
1047 freqs->main[literal]++;
1049 return (u32)literal;
1052 /* Constructs a match from an offset and a length, and updates the LRU queue and
1053 * the frequency of symbols in the main, length, and aligned offset alphabets.
1054 * The return value is a 32-bit number that provides the match in an
1055 * intermediate representation documented below. */
1057 lzx_record_match(unsigned match_offset, unsigned match_len,
1058 void *_freqs, void *_queue)
1060 struct lzx_freqs *freqs = _freqs;
1061 struct lzx_lru_queue *queue = _queue;
1062 unsigned position_slot;
1063 unsigned position_footer = 0;
1066 unsigned len_footer;
1067 unsigned adjusted_match_len;
1069 LZX_ASSERT(match_len >= LZX_MIN_MATCH && match_len <= LZX_MAX_MATCH);
1071 /* If possible, encode this offset as a repeated offset. */
1072 if (match_offset == queue->R0) {
1074 } else if (match_offset == queue->R1) {
1075 swap(queue->R0, queue->R1);
1077 } else if (match_offset == queue->R2) {
1078 swap(queue->R0, queue->R2);
1081 /* Not a repeated offset. */
1083 /* offsets of 0, 1, and 2 are reserved for the repeated offset
1084 * codes, so non-repeated offsets must be encoded as 3+. The
1085 * minimum offset is 1, so encode the offsets offset by 2. */
1086 unsigned formatted_offset = match_offset + 2;
1088 queue->R2 = queue->R1;
1089 queue->R1 = queue->R0;
1090 queue->R0 = match_offset;
1092 /* The (now-formatted) offset will actually be encoded as a
1093 * small position slot number that maps to a certain hard-coded
1094 * offset (position base), followed by a number of extra bits---
1095 * the position footer--- that are added to the position base to
1096 * get the original formatted offset. */
1098 position_slot = lzx_get_position_slot(formatted_offset);
1099 position_footer = formatted_offset &
1100 ((1 << lzx_get_num_extra_bits(position_slot)) - 1);
1103 adjusted_match_len = match_len - LZX_MIN_MATCH;
1106 /* The match length must be at least 2, so let the adjusted match length
1107 * be the match length minus 2.
1109 * If it is less than 7, the adjusted match length is encoded as a 3-bit
1110 * number offset by 2. Otherwise, the 3-bit length header is all 1's
1111 * and the actual adjusted length is given as a symbol encoded with the
1112 * length tree, offset by 7.
1114 if (adjusted_match_len < LZX_NUM_PRIMARY_LENS) {
1115 len_header = adjusted_match_len;
1117 len_header = LZX_NUM_PRIMARY_LENS;
1118 len_footer = adjusted_match_len - LZX_NUM_PRIMARY_LENS;
1119 freqs->len[len_footer]++;
1121 len_pos_header = (position_slot << 3) | len_header;
1123 freqs->main[len_pos_header + LZX_NUM_CHARS]++;
1126 * if (lzx_extra_bits[position_slot] >= 3) */
1127 if (position_slot >= 8)
1128 freqs->aligned[position_footer & 7]++;
1130 /* Pack the position slot, position footer, and match length into an
1131 * intermediate representation.
1134 * ---- -----------------------------------------------------------
1136 * 31 1 if a match, 0 if a literal.
1138 * 30-25 position slot. This can be at most 50, so it will fit in 6
1141 * 8-24 position footer. This is the offset of the real formatted
1142 * offset from the position base. This can be at most 17 bits
1143 * (since lzx_extra_bits[LZX_NUM_POSITION_SLOTS - 1] is 17).
1145 * 0-7 length of match, offset by 2. This can be at most
1146 * (LZX_MAX_MATCH - 2) == 255, so it will fit in 8 bits. */
1148 (position_slot << 25) |
1149 (position_footer << 8) |
1150 (adjusted_match_len);
1153 /* Set the cost model @ctx->costs from the Huffman codeword lengths specified in
1156 * These are basically the same thing, except that Huffman codewords with length
1157 * 0 corresponds to symbols with zero frequency. These need to be assigned
1158 * actual costs. The specific values assigned are arbitrary, but they should be
1159 * fairly high (near the maximum codeword length) to take into account the fact
1160 * that uses of these symbols are expected to be rare.
1163 lzx_set_costs(struct lzx_compressor * ctx, const struct lzx_lens * lens)
1167 memcpy(&ctx->costs, lens, sizeof(struct lzx_lens));
1169 for (i = 0; i < LZX_MAINTREE_NUM_SYMBOLS; i++)
1170 if (ctx->costs.main[i] == 0)
1171 ctx->costs.main[i] = ctx->params.slow.main_nostat_cost;
1173 for (i = 0; i < LZX_LENTREE_NUM_SYMBOLS; i++)
1174 if (ctx->costs.len[i] == 0)
1175 ctx->costs.len[i] = ctx->params.slow.len_nostat_cost;
1177 for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
1178 if (ctx->costs.aligned[i] == 0)
1179 ctx->costs.aligned[i] = ctx->params.slow.aligned_nostat_cost;
1183 lzx_literal_cost(u8 c, const struct lzx_lens * costs)
1185 return costs->main[c];
1188 /* Given a (length, offset) pair that could be turned into a valid LZX match as
1189 * well as costs for the codewords in the main, length, and aligned Huffman
1190 * codes, return the approximate number of bits it will take to represent this
1191 * match in the compressed output. */
1193 lzx_match_cost(unsigned length, unsigned offset, const struct lzx_lens *costs
1195 #if LZX_PARAM_ACCOUNT_FOR_LRU
1196 , struct lzx_lru_queue *queue
1200 unsigned position_slot, len_header, main_symbol;
1203 /* Calculate position slot and length header, then combine them into the
1206 #if LZX_PARAM_ACCOUNT_FOR_LRU
1207 if (offset == queue->R0) {
1209 } else if (offset == queue->R1) {
1210 swap(queue->R0, queue->R1);
1212 } else if (offset == queue->R2) {
1213 swap(queue->R0, queue->R2);
1217 position_slot = lzx_get_position_slot(offset + 2);
1219 len_header = min(length - LZX_MIN_MATCH, LZX_NUM_PRIMARY_LENS);
1220 main_symbol = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
1222 /* Account for main symbol. */
1223 cost += costs->main[main_symbol];
1225 /* Account for extra position information. */
1226 unsigned num_extra_bits = lzx_get_num_extra_bits(position_slot);
1227 if (num_extra_bits >= 3) {
1228 cost += num_extra_bits - 3;
1229 cost += costs->aligned[(offset + LZX_MIN_MATCH) & 7];
1231 cost += num_extra_bits;
1234 /* Account for extra length information. */
1235 if (length - LZX_MIN_MATCH >= LZX_NUM_PRIMARY_LENS)
1236 cost += costs->len[length - LZX_MIN_MATCH - LZX_NUM_PRIMARY_LENS];
1241 /* This procedure effectively creates a new binary tree corresponding to the
1242 * current string at the same time that it searches the existing tree nodes for
1245 lzx_lz_get_matches(const u8 window[restrict],
1246 const unsigned bytes_remaining,
1247 const unsigned strstart,
1248 const unsigned max_length,
1249 u16 child_tab[restrict],
1251 const unsigned prev_len,
1252 struct raw_match * const matches)
1254 u16 *new_tree_lt_ptr = &child_tab[strstart * 2];
1255 u16 *new_tree_gt_ptr = &child_tab[strstart * 2 + 1];
1257 u16 longest_lt_match_len = 0;
1258 u16 longest_gt_match_len = 0;
1260 /* Maximum number of nodes to walk down before stopping */
1261 unsigned depth = max_length;
1263 /* Length of longest match found so far */
1264 unsigned longest_match_len = prev_len;
1266 /* Maximum length of match to return */
1267 unsigned len_limit = min(bytes_remaining, max_length);
1269 /* Number of matches found so far */
1270 unsigned num_matches = 0;
1274 /* Stop if too many nodes were traversed or if there is no next
1276 if (depth-- == 0 || cur_match == 0) {
1277 *new_tree_gt_ptr = 0;
1278 *new_tree_lt_ptr = 0;
1282 /* Load the pointers to the children of the binary tree node
1283 * corresponding to the current match */
1284 u16 * const cur_match_ptrs = &child_tab[cur_match * 2];
1286 /* Set up pointers to the current match and to the current
1288 const u8 * const matchptr = &window[cur_match];
1289 const u8 * const strptr = &window[strstart];
1291 u16 len = min(longest_lt_match_len,
1292 longest_gt_match_len);
1294 if (matchptr[len] == strptr[len]) {
1295 while (++len != len_limit)
1296 if (matchptr[len] != strptr[len])
1299 if (len > longest_match_len) {
1300 longest_match_len = len;
1301 matches[num_matches].len = len;
1302 matches[num_matches].offset = strstart - cur_match;
1305 if (len == len_limit) {
1306 /* Length limit was reached. Link left pointer
1307 * in the new tree with left subtree of current
1308 * match tree, and link the right pointer in the
1309 * new tree with the right subtree of the
1310 * current match tree. This in effect deletes
1311 * the node for the currrent match, which is
1312 * desirable because the current match is the
1313 * same as the current string up until the
1314 * length limit, so in subsequent queries it
1315 * will never be preferable to the current
1317 *new_tree_lt_ptr = cur_match_ptrs[0];
1318 *new_tree_gt_ptr = cur_match_ptrs[1];
1324 if (matchptr[len] < strptr[len]) {
1325 /* Case 1: The current match is lexicographically less
1326 * than the current string.
1328 * Since we are searching the binary tree structures, we
1329 * need to walk down to the *right* subtree of the
1330 * current match's node to get to a match that is
1331 * lexicographically *greater* than the current match
1332 * but still lexicographically *lesser* than the current
1335 * At the same time, we link the entire binary tree
1336 * corresponding to the current match into the
1337 * appropriate place in the new binary tree being built
1338 * for the current string. */
1339 *new_tree_lt_ptr = cur_match;
1340 new_tree_lt_ptr = &cur_match_ptrs[1];
1341 cur_match = *new_tree_lt_ptr;
1342 longest_lt_match_len = len;
1344 /* Case 2: The current match is lexicographically
1345 * greater than the current string.
1347 * This is analogous to Case 1 above, but everything
1348 * happens in the other direction.
1350 *new_tree_gt_ptr = cur_match;
1351 new_tree_gt_ptr = &cur_match_ptrs[0];
1352 cur_match = *new_tree_gt_ptr;
1353 longest_gt_match_len = len;
1358 /* Equivalent to lzx_lz_get_matches(), but only updates the tree and doesn't
1359 * return matches. See that function for details (including comments). */
1361 lzx_lz_skip_matches(const u8 window[restrict],
1362 const unsigned bytes_remaining,
1363 const unsigned strstart,
1364 const unsigned max_length,
1365 u16 child_tab[restrict],
1367 const unsigned prev_len)
1369 u16 *new_tree_lt_ptr = &child_tab[strstart * 2];
1370 u16 *new_tree_gt_ptr = &child_tab[strstart * 2 + 1];
1372 u16 longest_lt_match_len = 0;
1373 u16 longest_gt_match_len = 0;
1375 unsigned depth = max_length;
1377 unsigned longest_match_len = prev_len;
1379 unsigned len_limit = min(bytes_remaining, max_length);
1382 if (depth-- == 0 || cur_match == 0) {
1383 *new_tree_gt_ptr = 0;
1384 *new_tree_lt_ptr = 0;
1388 u16 * const cur_match_ptrs = &child_tab[cur_match * 2];
1390 const u8 * const matchptr = &window[cur_match];
1391 const u8 * const strptr = &window[strstart];
1393 u16 len = min(longest_lt_match_len,
1394 longest_gt_match_len);
1396 if (matchptr[len] == strptr[len]) {
1397 while (++len != len_limit)
1398 if (matchptr[len] != strptr[len])
1401 if (len > longest_match_len) {
1402 longest_match_len = len;
1404 if (len == len_limit) {
1405 *new_tree_lt_ptr = cur_match_ptrs[0];
1406 *new_tree_gt_ptr = cur_match_ptrs[1];
1412 if (matchptr[len] < strptr[len]) {
1413 *new_tree_lt_ptr = cur_match;
1414 new_tree_lt_ptr = &cur_match_ptrs[1];
1415 cur_match = *new_tree_lt_ptr;
1416 longest_lt_match_len = len;
1418 *new_tree_gt_ptr = cur_match;
1419 new_tree_gt_ptr = &cur_match_ptrs[0];
1420 cur_match = *new_tree_gt_ptr;
1421 longest_gt_match_len = len;
1427 lzx_lz_get_matches_caching(struct lzx_compressor *ctx,
1428 struct raw_match **matches_ret);
1430 /* Tell the match-finder to skip the specified number of bytes (@n) in the
1433 lzx_lz_skip_bytes(struct lzx_compressor *ctx, unsigned n)
1436 #if LZX_PARAM_DONT_SKIP_MATCHES
1437 /* Option 1: Still cache the matches from the positions skipped. They
1438 * will then be available in later passes. */
1439 struct raw_match *matches;
1441 lzx_lz_get_matches_caching(ctx, &matches);
1443 /* Option 2: Simply mark the positions skipped as having no matches
1445 LZX_ASSERT(n <= ctx->match_window_end - ctx->match_window_pos);
1446 if (ctx->matches_already_found) {
1448 LZX_ASSERT(ctx->cached_matches[ctx->cached_matches_pos].offset ==
1449 ctx->match_window_pos);
1450 ctx->cached_matches_pos += ctx->cached_matches[ctx->cached_matches_pos].len + 1;
1451 ctx->match_window_pos++;
1455 if (ctx->params.slow.use_len2_matches &&
1456 ctx->match_window_end - ctx->match_window_pos >= 2) {
1457 unsigned c1 = ctx->window[ctx->match_window_pos];
1458 unsigned c2 = ctx->window[ctx->match_window_pos + 1];
1459 unsigned digram = c1 | (c2 << 8);
1460 ctx->digram_tab[digram] = ctx->match_window_pos;
1462 if (ctx->match_window_end - ctx->match_window_pos >= 3) {
1466 hash = lzx_lz_compute_hash(&ctx->window[ctx->match_window_pos]);
1468 cur_match = ctx->hash_tab[hash];
1469 ctx->hash_tab[hash] = ctx->match_window_pos;
1471 lzx_lz_skip_matches(ctx->window,
1472 ctx->match_window_end - ctx->match_window_pos,
1473 ctx->match_window_pos,
1474 ctx->params.slow.num_fast_bytes,
1478 ctx->cached_matches[ctx->cached_matches_pos].len = 0;
1479 ctx->cached_matches[ctx->cached_matches_pos].offset = ctx->match_window_pos;
1480 ctx->cached_matches_pos++;
1481 ctx->match_window_pos++;
1484 #endif /* !LZX_PARAM_DONT_SKIP_MATCHES */
1487 /* Retrieve a list of matches available at the next position in the input.
1489 * The return value is the number of matches found, and a pointer to them is
1490 * written to @matches_ret. The matches will be sorted in order by length.
1492 * This is essentially a wrapper around lzx_lz_get_matches() that caches its
1493 * output the first time and also performs the needed hashing.
1496 lzx_lz_get_matches_caching(struct lzx_compressor *ctx,
1497 struct raw_match **matches_ret)
1499 unsigned num_matches;
1500 struct raw_match *matches;
1502 LZX_ASSERT(ctx->match_window_end >= ctx->match_window_pos);
1504 matches = &ctx->cached_matches[ctx->cached_matches_pos + 1];
1506 if (ctx->matches_already_found) {
1507 num_matches = ctx->cached_matches[ctx->cached_matches_pos].len;
1508 LZX_ASSERT(ctx->cached_matches[ctx->cached_matches_pos].offset == ctx->match_window_pos);
1510 for (int i = (int)num_matches - 1; i >= 0; i--) {
1511 if (ctx->match_window_pos + matches[i].len > ctx->match_window_end)
1512 matches[i].len = ctx->match_window_end - ctx->match_window_pos;
1517 unsigned prev_len = 1;
1518 struct raw_match * matches_ret = &ctx->cached_matches[ctx->cached_matches_pos + 1];
1521 if (ctx->params.slow.use_len2_matches &&
1522 ctx->match_window_end - ctx->match_window_pos >= 3) {
1523 unsigned c1 = ctx->window[ctx->match_window_pos];
1524 unsigned c2 = ctx->window[ctx->match_window_pos + 1];
1525 unsigned digram = c1 | (c2 << 8);
1528 cur_match = ctx->digram_tab[digram];
1529 ctx->digram_tab[digram] = ctx->match_window_pos;
1530 if (cur_match != 0 &&
1531 ctx->window[cur_match + 2] != ctx->window[ctx->match_window_pos + 2])
1533 matches_ret->len = 2;
1534 matches_ret->offset = ctx->match_window_pos - cur_match;
1540 if (ctx->match_window_end - ctx->match_window_pos >= 3) {
1544 hash = lzx_lz_compute_hash(&ctx->window[ctx->match_window_pos]);
1546 cur_match = ctx->hash_tab[hash];
1547 ctx->hash_tab[hash] = ctx->match_window_pos;
1548 num_matches += lzx_lz_get_matches(ctx->window,
1549 ctx->match_window_end - ctx->match_window_pos,
1550 ctx->match_window_pos,
1551 ctx->params.slow.num_fast_bytes,
1558 ctx->cached_matches[ctx->cached_matches_pos].len = num_matches;
1559 ctx->cached_matches[ctx->cached_matches_pos].offset = ctx->match_window_pos;
1562 struct raw_match *longest_match_ptr =
1563 &ctx->cached_matches[ctx->cached_matches_pos + 1 +
1565 u16 len = longest_match_ptr->len;
1567 /* If the longest match returned by the match-finder
1568 * reached the number of fast bytes, extend it as much
1570 if (len == ctx->params.slow.num_fast_bytes) {
1571 const unsigned maxlen =
1572 min(ctx->match_window_end - ctx->match_window_pos,
1575 const u8 * const matchptr =
1576 &ctx->window[ctx->match_window_pos - longest_match_ptr->offset];
1578 const u8 * const strptr =
1579 &ctx->window[ctx->match_window_pos];
1581 while (len < maxlen && matchptr[len] == strptr[len])
1584 longest_match_ptr->len = len;
1587 ctx->cached_matches_pos += num_matches + 1;
1588 *matches_ret = matches;
1592 for (unsigned i = 0; i < num_matches; i++)
1594 printf("Len %u Offset %u\n", matches[i].len, matches[i].offset);
1598 for (unsigned i = 0; i < num_matches; i++) {
1599 LZX_ASSERT(matches[i].len <= LZX_MAX_MATCH);
1600 if (matches[i].len >= LZX_MIN_MATCH) {
1601 LZX_ASSERT(matches[i].offset <= ctx->match_window_pos);
1602 LZX_ASSERT(matches[i].len <= ctx->match_window_end - ctx->match_window_pos);
1603 LZX_ASSERT(!memcmp(&ctx->window[ctx->match_window_pos],
1604 &ctx->window[ctx->match_window_pos - matches[i].offset],
1609 ctx->match_window_pos++;
1614 * Reverse the linked list of near-optimal matches so that they can be returned
1615 * in forwards order.
1617 * Returns the first match in the list.
1619 static struct raw_match
1620 lzx_lz_reverse_near_optimal_match_list(struct lzx_compressor *ctx,
1623 unsigned prev_link, saved_prev_link;
1624 unsigned prev_match_offset, saved_prev_match_offset;
1626 ctx->optimum_end_idx = cur_pos;
1628 saved_prev_link = ctx->optimum[cur_pos].prev.link;
1629 saved_prev_match_offset = ctx->optimum[cur_pos].prev.match_offset;
1632 prev_link = saved_prev_link;
1633 prev_match_offset = saved_prev_match_offset;
1635 saved_prev_link = ctx->optimum[prev_link].prev.link;
1636 saved_prev_match_offset = ctx->optimum[prev_link].prev.match_offset;
1638 ctx->optimum[prev_link].next.link = cur_pos;
1639 ctx->optimum[prev_link].next.match_offset = prev_match_offset;
1641 cur_pos = prev_link;
1642 } while (cur_pos != 0);
1644 ctx->optimum_cur_idx = ctx->optimum[0].next.link;
1646 return (struct raw_match)
1647 { .len = ctx->optimum_cur_idx,
1648 .offset = ctx->optimum[0].next.match_offset,
1653 * lzx_lz_get_near_optimal_match() -
1655 * Choose the "best" match or literal to use at the next position in the input.
1657 * Unlike a "greedy" parser that always takes the longest match, or even a
1658 * parser with one match/literal look-ahead like zlib, the algorithm used here
1659 * may look ahead many matches/literals to determine the best match/literal to
1660 * output next. The motivation is that the compression ratio is improved if the
1661 * compressor can do things like use a shorter-than-possible match in order to
1662 * allow a longer match later, and also take into account the Huffman code cost
1663 * model rather than simply assuming that longer is better. It is not a true
1664 * "optimal" parser, however, since some shortcuts can be taken; for example, if
1665 * a match is very long, the parser just chooses it immediately before too much
1666 * time is wasting considering many different alternatives that are unlikely to
1669 * This algorithm is based on that used in 7-Zip's deflate encoder.
1671 * Each call to this function does one of two things:
1673 * 1. Build a near-optimal sequence of matches/literals, up to some point, that
1674 * will be returned by subsequent calls to this function, then return the
1679 * 2. Return the next match/literal previously computed by a call to this
1682 * This function relies on the following state in the compressor context:
1684 * ctx->window (read-only: preprocessed data being compressed)
1685 * ctx->cost (read-only: cost model to use)
1686 * ctx->optimum (internal state; leave uninitialized)
1687 * ctx->optimum_cur_idx (must set to 0 before first call)
1688 * ctx->optimum_end_idx (must set to 0 before first call)
1689 * ctx->hash_tab (must set to 0 before first call)
1690 * ctx->cached_matches (internal state; leave uninitialized)
1691 * ctx->cached_matches_pos (initialize to 0 before first call; save and
1692 * restore value if restarting parse from a
1694 * ctx->match_window_pos (must initialize to position of next match to
1695 * return; subsequent calls return subsequent
1697 * ctx->match_window_end (must initialize to limit of match-finding region;
1698 * subsequent calls use the same limit)
1700 * The return value is a (length, offset) pair specifying the match or literal
1703 static struct raw_match
1704 lzx_lz_get_near_optimal_match(struct lzx_compressor * ctx)
1707 /* Testing: literals only */
1708 ctx->match_window_pos++;
1709 return (struct raw_match) { .len = 0 };
1711 /* Testing: greedy parsing */
1712 struct raw_match *matches;
1713 unsigned num_matches;
1714 struct raw_match match = {.len = 0};
1716 num_matches = lzx_lz_get_matches_caching(ctx, &matches);
1718 match = matches[num_matches - 1];
1719 lzx_lz_skip_bytes(ctx, match.len - 1);
1723 unsigned num_possible_matches;
1724 struct raw_match *possible_matches;
1725 struct raw_match match;
1726 unsigned longest_match_len;
1727 unsigned len, match_idx;
1729 if (ctx->optimum_cur_idx != ctx->optimum_end_idx) {
1730 /* Case 2: Return the next match/literal already found. */
1731 match.len = ctx->optimum[ctx->optimum_cur_idx].next.link -
1732 ctx->optimum_cur_idx;
1733 match.offset = ctx->optimum[ctx->optimum_cur_idx].next.match_offset;
1735 ctx->optimum_cur_idx = ctx->optimum[ctx->optimum_cur_idx].next.link;
1739 /* Case 1: Compute a new list of matches/literals to return. */
1741 ctx->optimum_cur_idx = 0;
1742 ctx->optimum_end_idx = 0;
1744 /* Get matches at this position. */
1745 num_possible_matches = lzx_lz_get_matches_caching(ctx, &possible_matches);
1747 /* If no matches found, return literal. */
1748 if (num_possible_matches == 0)
1749 return (struct raw_match){ .len = 0 };
1751 /* The matches that were found are sorted by length. Get the length of
1752 * the longest one. */
1753 longest_match_len = possible_matches[num_possible_matches - 1].len;
1755 /* Greedy heuristic: if the longest match that was found is greater
1756 * than LZX_PARAM_NUM_FAST_BYTES, return it immediately; don't both
1757 * doing more work. */
1758 if (longest_match_len > ctx->params.slow.num_fast_bytes) {
1759 lzx_lz_skip_bytes(ctx, longest_match_len - 1);
1760 return possible_matches[num_possible_matches - 1];
1763 /* Calculate the cost to reach the next position by outputting a
1765 #if LZX_PARAM_ACCOUNT_FOR_LRU
1766 ctx->optimum[0].queue = ctx->queue;
1767 ctx->optimum[1].queue = ctx->optimum[0].queue;
1769 ctx->optimum[1].cost = lzx_literal_cost(ctx->window[ctx->match_window_pos],
1771 ctx->optimum[1].prev.link = 0;
1773 /* Calculate the cost to reach any position up to and including that
1774 * reached by the longest match, using the shortest (i.e. closest) match
1775 * that reaches each position. */
1777 BUILD_BUG_ON(LZX_MIN_MATCH != 2);
1778 for (len = LZX_MIN_MATCH; len <= longest_match_len; len++) {
1780 LZX_ASSERT(match_idx < num_possible_matches);
1782 #if LZX_PARAM_ACCOUNT_FOR_LRU
1783 ctx->optimum[len].queue = ctx->optimum[0].queue;
1785 ctx->optimum[len].prev.link = 0;
1786 ctx->optimum[len].prev.match_offset = possible_matches[match_idx].offset;
1787 ctx->optimum[len].cost = lzx_match_cost(len,
1788 possible_matches[match_idx].offset,
1790 #if LZX_PARAM_ACCOUNT_FOR_LRU
1791 , &ctx->optimum[len].queue
1794 if (len == possible_matches[match_idx].len)
1798 unsigned cur_pos = 0;
1800 /* len_end: greatest index forward at which costs have been calculated
1802 unsigned len_end = longest_match_len;
1806 /* Advance to next position. */
1809 if (cur_pos == len_end || cur_pos == LZX_PARAM_OPTIM_ARRAY_SIZE)
1810 return lzx_lz_reverse_near_optimal_match_list(ctx, cur_pos);
1812 /* retrieve the number of matches available at this position */
1813 num_possible_matches = lzx_lz_get_matches_caching(ctx,
1816 unsigned new_len = 0;
1818 if (num_possible_matches != 0) {
1819 new_len = possible_matches[num_possible_matches - 1].len;
1821 /* Greedy heuristic: if we found a match greater than
1822 * LZX_PARAM_NUM_FAST_BYTES, stop immediately. */
1823 if (new_len > ctx->params.slow.num_fast_bytes) {
1825 /* Build the list of matches to return and get
1827 match = lzx_lz_reverse_near_optimal_match_list(ctx, cur_pos);
1829 /* Append the long match to the end of the list. */
1830 ctx->optimum[cur_pos].next.match_offset =
1831 possible_matches[num_possible_matches - 1].offset;
1832 ctx->optimum[cur_pos].next.link = cur_pos + new_len;
1833 ctx->optimum_end_idx = cur_pos + new_len;
1835 /* Skip over the remaining bytes of the long match. */
1836 lzx_lz_skip_bytes(ctx, new_len - 1);
1838 /* Return first match in the list */
1843 /* Consider proceeding with a literal byte. */
1844 u32 cur_cost = ctx->optimum[cur_pos].cost;
1845 u32 cur_plus_literal_cost = cur_cost +
1846 lzx_literal_cost(ctx->window[ctx->match_window_pos - 1],
1848 if (cur_plus_literal_cost < ctx->optimum[cur_pos + 1].cost) {
1849 ctx->optimum[cur_pos + 1].cost = cur_plus_literal_cost;
1850 ctx->optimum[cur_pos + 1].prev.link = cur_pos;
1851 #if LZX_PARAM_ACCOUNT_FOR_LRU
1852 ctx->optimum[cur_pos + 1].queue = ctx->optimum[cur_pos].queue;
1856 if (num_possible_matches == 0)
1859 /* Consider proceeding with a match. */
1861 while (len_end < cur_pos + new_len)
1862 ctx->optimum[++len_end].cost = ~(u32)0;
1865 for (len = LZX_MIN_MATCH; len <= new_len; len++) {
1866 LZX_ASSERT(match_idx < num_possible_matches);
1867 #if LZX_PARAM_ACCOUNT_FOR_LRU
1868 struct lzx_lru_queue q = ctx->optimum[cur_pos].queue;
1870 u32 cost = cur_cost + lzx_match_cost(len,
1871 possible_matches[match_idx].offset,
1873 #if LZX_PARAM_ACCOUNT_FOR_LRU
1878 if (cost < ctx->optimum[cur_pos + len].cost) {
1879 ctx->optimum[cur_pos + len].cost = cost;
1880 ctx->optimum[cur_pos + len].prev.link = cur_pos;
1881 ctx->optimum[cur_pos + len].prev.match_offset =
1882 possible_matches[match_idx].offset;
1883 #if LZX_PARAM_ACCOUNT_FOR_LRU
1884 ctx->optimum[cur_pos + len].queue = q;
1888 if (len == possible_matches[match_idx].len)
1895 /* Account for extra bits in the main symbols. */
1897 lzx_update_mainsym_match_costs(int block_type,
1898 u8 main_lens[LZX_MAINTREE_NUM_SYMBOLS])
1902 LZX_ASSERT(block_type == LZX_BLOCKTYPE_ALIGNED ||
1903 block_type == LZX_BLOCKTYPE_VERBATIM);
1905 for (i = LZX_NUM_CHARS; i < LZX_MAINTREE_NUM_SYMBOLS; i++) {
1906 unsigned position_slot = (i >> 3) & 0x1f;
1908 /* If it's a verbatim block, add the number of extra bits
1909 * corresponding to the position slot.
1911 * If it's an aligned block and there would normally be at least
1912 * 3 extra bits, count 3 less because they will be output as an
1913 * aligned offset symbol instead. */
1914 unsigned num_extra_bits = lzx_get_num_extra_bits(position_slot);
1916 if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3)
1917 num_extra_bits -= 3;
1918 main_lens[i] += num_extra_bits;
1923 * Compute the costs, in bits, to output a compressed block as aligned offset
1927 * Number of bytes of uncompressed data this block represents.
1929 * Huffman codes that will be used to output the block.
1931 * Huffman codes for the previous block, or all zeroes if this is the first
1934 * Frequencies of Huffman symbol that will be output in the block.
1936 * Cost of aligned block will be returned here.
1937 * @verbatim_cost_ret
1938 * Cost of verbatim block will be returned here.
1941 lzx_compute_compressed_block_costs(unsigned block_size,
1942 const struct lzx_codes *codes,
1943 const struct lzx_codes *prev_codes,
1944 const struct lzx_freqs *freqs,
1945 unsigned * aligned_cost_ret,
1946 unsigned * verbatim_cost_ret)
1948 unsigned common_cost = 0;
1949 unsigned aligned_cost = 0;
1950 unsigned verbatim_cost = 0;
1952 u8 updated_main_lens[LZX_MAINTREE_NUM_SYMBOLS];
1954 /* Account for cost of block header. */
1955 common_cost += LZX_BLOCKTYPE_NBITS;
1956 if (block_size == LZX_DEFAULT_BLOCK_SIZE)
1959 common_cost += LZX_BLOCKSIZE_NBITS;
1961 /* Account for cost of outputting aligned offset code. */
1962 aligned_cost += LZX_ALIGNEDTREE_NUM_SYMBOLS * LZX_ALIGNEDTREE_ELEMENT_SIZE;
1964 /* Account for cost of outputting main and length codes. */
1965 common_cost += lzx_code_cost(codes->lens.main,
1966 prev_codes->lens.main,
1968 common_cost += lzx_code_cost(codes->lens.main + LZX_NUM_CHARS,
1969 prev_codes->lens.main + LZX_NUM_CHARS,
1970 LZX_MAINTREE_NUM_SYMBOLS - LZX_NUM_CHARS);
1971 common_cost += lzx_code_cost(codes->lens.len,
1972 prev_codes->lens.len,
1973 LZX_LENTREE_NUM_SYMBOLS);
1975 /* Account for cost to output main, length, and aligned symbols, taking
1976 * into account extra position bits. */
1978 memcpy(updated_main_lens, codes->lens.main, LZX_MAINTREE_NUM_SYMBOLS);
1979 lzx_update_mainsym_match_costs(LZX_BLOCKTYPE_VERBATIM, updated_main_lens);
1980 verbatim_cost += lzx_huffman_code_output_cost(updated_main_lens,
1982 LZX_MAINTREE_NUM_SYMBOLS);
1983 memcpy(updated_main_lens, codes->lens.main, LZX_MAINTREE_NUM_SYMBOLS);
1984 lzx_update_mainsym_match_costs(LZX_BLOCKTYPE_ALIGNED, updated_main_lens);
1985 aligned_cost += lzx_huffman_code_output_cost(updated_main_lens,
1987 LZX_MAINTREE_NUM_SYMBOLS);
1989 common_cost += lzx_huffman_code_output_cost(codes->lens.len,
1991 LZX_LENTREE_NUM_SYMBOLS);
1993 aligned_cost += lzx_huffman_code_output_cost(codes->lens.aligned,
1995 LZX_ALIGNEDTREE_NUM_SYMBOLS);
1997 *aligned_cost_ret = aligned_cost + common_cost;
1998 *verbatim_cost_ret = verbatim_cost + common_cost;
2001 /* Prepare a (nonsplit) compressed block. */
2003 lzx_prepare_compressed_block(struct lzx_compressor *ctx, unsigned block_number,
2004 struct lzx_codes *prev_codes)
2006 struct lzx_block_spec *spec = &ctx->block_specs[block_number - 1];
2007 unsigned orig_cached_matches_pos = ctx->cached_matches_pos;
2008 struct lzx_lru_queue orig_queue = ctx->queue;
2009 struct lzx_freqs freqs;
2012 /* Here's where the real work happens. The following loop runs one or
2013 * more times, each time using a cost model based on the Huffman codes
2014 * computed from the previous iteration (the first iteration uses a
2015 * default model). Each iteration of the loop uses a heuristic
2016 * algorithm to divide the block into near-optimal matches/literals from
2017 * beginning to end. */
2018 LZX_ASSERT(ctx->params.slow.num_optim_passes >= 1);
2019 spec->num_chosen_matches = 0;
2020 for (unsigned pass = 0; pass < ctx->params.slow.num_optim_passes; pass++)
2022 LZX_DEBUG("Block %u: Match-choosing pass %u of %u",
2023 block_number, pass + 1,
2024 ctx->params.slow.num_optim_passes);
2026 /* Reset frequency tables. */
2027 memset(&freqs, 0, sizeof(freqs));
2029 /* Reset match offset LRU queue. */
2030 ctx->queue = orig_queue;
2032 /* Reset match-finding position. */
2033 ctx->cached_matches_pos = orig_cached_matches_pos;
2034 ctx->match_window_pos = spec->window_pos;
2035 ctx->match_window_end = spec->window_pos + spec->block_size;
2037 /* Set cost model. */
2038 lzx_set_costs(ctx, &spec->codes.lens);
2040 unsigned window_pos = spec->window_pos;
2041 unsigned end = window_pos + spec->block_size;
2043 while (window_pos < end) {
2044 struct raw_match match;
2045 struct lzx_match lzx_match;
2047 match = lzx_lz_get_near_optimal_match(ctx);
2049 if (match.len >= LZX_MIN_MATCH) {
2051 /* Best to output a match here. */
2053 LZX_ASSERT(match.len <= LZX_MAX_MATCH);
2054 LZX_ASSERT(!memcmp(&ctx->window[window_pos],
2055 &ctx->window[window_pos - match.offset],
2058 /* Tally symbol frequencies. */
2059 lzx_match.data = lzx_record_match(match.offset,
2064 window_pos += match.len;
2066 /* Best to output a literal here. */
2068 /* Tally symbol frequencies. */
2069 lzx_match.data = lzx_record_literal(ctx->window[window_pos],
2075 /* If it's the last pass, save the match/literal in
2076 * intermediate form. */
2077 if (pass == ctx->params.slow.num_optim_passes - 1) {
2078 ctx->chosen_matches[spec->chosen_matches_start_pos +
2079 spec->num_chosen_matches] = lzx_match;
2081 spec->num_chosen_matches++;
2084 LZX_ASSERT(window_pos == end);
2086 /* Build Huffman codes using the new frequencies. */
2087 lzx_make_huffman_codes(&freqs, &spec->codes);
2089 /* The first time we get here is when the full input has been
2090 * processed, so the match-finding is done. */
2091 ctx->matches_already_found = true;
2094 LZX_DEBUG("Block %u: saved %u matches/literals @ %u",
2095 block_number, spec->num_chosen_matches,
2096 spec->chosen_matches_start_pos);
2098 unsigned aligned_cost;
2099 unsigned verbatim_cost;
2101 lzx_compute_compressed_block_costs(spec->block_size,
2108 /* Choose whether to make the block aligned offset or verbatim. */
2109 if (aligned_cost < verbatim_cost) {
2110 spec->block_type = LZX_BLOCKTYPE_ALIGNED;
2111 cost = aligned_cost;
2112 LZX_DEBUG("Using aligned block (cost %u vs %u for verbatim)",
2113 aligned_cost, verbatim_cost);
2115 spec->block_type = LZX_BLOCKTYPE_VERBATIM;
2116 cost = verbatim_cost;
2117 LZX_DEBUG("Using verbatim block (cost %u vs %u for aligned)",
2118 verbatim_cost, aligned_cost);
2121 LZX_DEBUG("Block %u is %u => %u bytes unsplit.",
2122 block_number, spec->block_size, cost / 8);
2128 * lzx_prepare_block_recursive() -
2130 * Given a (possibly nonproper) sub-sequence of the preprocessed input, compute
2131 * the LZX block(s) that it should be output as.
2133 * This function initially considers the case where the given sub-sequence of
2134 * the preprocessed input be output as a single block. This block is calculated
2135 * and its cost (number of bits required to output it) is computed.
2137 * Then, if @max_split_level is greater than zero, a split into two evenly sized
2138 * subblocks is considered. The block is recursively split in this way,
2139 * potentially up to the depth specified by @max_split_level. The cost of the
2140 * split block is compared to the cost of the single block, and the lower cost
2143 * For each compressed output block computed, the sequence of matches/literals
2144 * and the corresponding Huffman codes for the block are produced and saved.
2146 * The return value is the approximate number of bits the block (or all
2147 * subblocks, in the case that the split block had lower cast), will take up
2148 * when written to the compressed output.
2151 lzx_prepare_block_recursive(struct lzx_compressor * ctx,
2152 unsigned block_number,
2153 unsigned max_split_level,
2154 struct lzx_codes **prev_codes_p)
2156 struct lzx_block_spec *spec = &ctx->block_specs[block_number - 1];
2158 unsigned orig_cached_matches_pos;
2159 struct lzx_lru_queue orig_queue, nonsplit_queue;
2160 struct lzx_codes *prev_codes = *prev_codes_p;
2162 LZX_DEBUG("Preparing block %u...", block_number);
2164 /* Save positions of chosen and cached matches, and the match offset LRU
2165 * queue, so that they can be restored if splitting is attempted. */
2166 orig_cached_matches_pos = ctx->cached_matches_pos;
2167 orig_queue = ctx->queue;
2169 /* Consider outputting the input subsequence as a single block. */
2171 cost = lzx_prepare_compressed_block(ctx, block_number, prev_codes);
2172 nonsplit_queue = ctx->queue;
2174 *prev_codes_p = &spec->codes;
2176 /* If the maximum split level is at least one, consider splitting the
2178 if (max_split_level--) {
2180 LZX_DEBUG("Calculating split of block %u...", block_number);
2182 struct lzx_block_spec *spec1, *spec2;
2183 unsigned split_cost;
2185 ctx->cached_matches_pos = orig_cached_matches_pos;
2186 ctx->queue = orig_queue;
2188 /* Prepare and get the cost of the first sub-block. */
2189 spec1 = &ctx->block_specs[block_number * 2 - 1];
2190 spec1->codes.lens = spec->codes.lens;
2191 spec1->window_pos = spec->window_pos;
2192 spec1->block_size = spec->block_size / 2;
2193 spec1->chosen_matches_start_pos = spec->chosen_matches_start_pos +
2194 LZX_MAX_WINDOW_SIZE;
2195 split_cost = lzx_prepare_block_recursive(ctx,
2200 /* Prepare and get the cost of the second sub-block. */
2202 spec2->codes.lens = spec->codes.lens;
2203 spec2->window_pos = spec->window_pos + spec1->block_size;
2204 spec2->block_size = spec->block_size - spec1->block_size;
2205 spec2->chosen_matches_start_pos = spec1->chosen_matches_start_pos +
2207 split_cost += lzx_prepare_block_recursive(ctx,
2208 block_number * 2 + 1,
2212 /* Compare the cost of the whole block with that of the split
2213 * block. Choose the lower cost solution. */
2214 if (split_cost < cost) {
2215 LZX_DEBUG("Splitting block %u is worth it "
2216 "(%u => %u bytes).",
2217 block_number, cost / 8, split_cost / 8);
2220 *prev_codes_p = prev_codes;
2222 LZX_DEBUG("Splitting block %u is NOT worth it "
2223 "(%u => %u bytes).",
2224 block_number, cost / 8, split_cost / 8);
2225 ctx->queue = nonsplit_queue;
2232 /* Empirical averages */
2233 static const u8 lzx_default_mainsym_costs[LZX_MAINTREE_NUM_SYMBOLS] = {
2234 7, 9, 9, 10, 9, 10, 10, 10, 9, 10, 9, 10, 10, 9, 10, 10, 9, 10, 10, 11,
2235 10, 10, 10, 11, 10, 11, 11, 11, 10, 11, 11, 11, 8, 11, 9, 10, 9, 10, 11,
2236 11, 9, 9, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 9, 9, 9, 8, 8, 9, 9, 9, 9,
2237 10, 10, 10, 8, 9, 8, 8, 8, 8, 9, 9, 9, 10, 10, 8, 8, 9, 9, 8, 10, 9, 8,
2238 8, 9, 8, 9, 9, 10, 10, 10, 9, 10, 11, 9, 10, 8, 9, 8, 8, 8, 8, 9, 8, 8,
2239 9, 9, 8, 8, 8, 8, 8, 10, 8, 8, 7, 8, 9, 9, 9, 9, 10, 11, 10, 10, 11, 11,
2240 10, 11, 11, 10, 10, 11, 11, 11, 10, 10, 11, 10, 11, 10, 11, 11, 10, 11,
2241 11, 12, 11, 11, 11, 12, 11, 11, 11, 11, 11, 11, 11, 12, 10, 11, 11, 11,
2242 11, 11, 11, 12, 11, 11, 11, 11, 11, 12, 11, 11, 10, 11, 11, 11, 11, 11,
2243 11, 11, 10, 11, 11, 11, 11, 11, 11, 11, 10, 11, 11, 11, 11, 11, 11, 11,
2244 10, 11, 11, 11, 11, 11, 11, 11, 10, 11, 11, 11, 11, 12, 11, 11, 10, 11,
2245 11, 11, 11, 12, 11, 11, 10, 11, 11, 11, 10, 12, 11, 11, 10, 10, 11, 10,
2246 10, 11, 11, 11, 10, 11, 11, 11, 10, 11, 11, 11, 10, 11, 11, 11, 10, 11,
2247 10, 9, 8, 7, 10, 10, 11, 10, 11, 7, 9, 9, 11, 11, 11, 12, 11, 9, 10, 10,
2248 12, 12, 13, 13, 12, 11, 10, 12, 12, 14, 14, 14, 13, 12, 9, 12, 13, 14,
2249 14, 14, 14, 14, 9, 10, 13, 14, 14, 14, 14, 14, 9, 9, 11, 11, 13, 13, 13,
2250 14, 9, 9, 11, 12, 12, 13, 13, 13, 8, 8, 11, 11, 12, 12, 12, 11, 9, 9,
2251 10, 11, 12, 12, 12, 11, 8, 9, 10, 10, 11, 12, 11, 10, 9, 9, 10, 11, 11,
2252 12, 11, 10, 8, 9, 10, 10, 11, 11, 11, 9, 9, 9, 10, 11, 11, 11, 11, 9, 8,
2253 8, 10, 10, 11, 11, 11, 9, 9, 9, 10, 10, 11, 11, 11, 9, 9, 8, 9, 10, 11,
2254 11, 11, 9, 10, 9, 10, 11, 11, 11, 11, 9, 14, 9, 9, 10, 10, 11, 10, 9,
2255 14, 9, 10, 11, 11, 11, 11, 9, 14, 9, 10, 10, 11, 11, 11, 9, 14, 10, 10,
2256 11, 11, 12, 11, 10, 14, 10, 10, 10, 11, 11, 11, 10, 14, 11, 11, 11, 11,
2257 12, 12, 10, 14, 10, 11, 11, 11, 12, 11, 10, 14, 11, 11, 11, 12, 12, 12,
2258 11, 15, 11, 11, 11, 12, 12, 12, 11, 14, 12, 12, 12, 12, 13, 12, 11, 15,
2259 12, 12, 12, 13, 13, 13, 12, 15, 14, 13, 14, 14, 14, 14, 13,
2262 /* Empirical averages */
2263 static const u8 lzx_default_lensym_costs[LZX_LENTREE_NUM_SYMBOLS] = {
2264 5, 5, 5, 5, 5, 6, 5, 5, 6, 7, 7, 7, 8, 8, 7, 8, 9, 9, 9, 9, 10, 9, 9,
2265 10, 9, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 11, 12, 12,
2266 12, 12, 12, 12, 13, 12, 12, 12, 13, 12, 13, 13, 12, 12, 13, 12, 13, 13,
2267 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 13, 14, 13, 14, 13,
2268 14, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2269 14, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2270 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2271 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2272 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2273 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2274 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2275 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2276 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2277 14, 14, 14, 14, 14, 14, 14, 14, 14, 10,
2281 * Set default symbol costs.
2284 lzx_set_default_costs(struct lzx_lens * lens)
2288 #if LZX_PARAM_USE_EMPIRICAL_DEFAULT_COSTS
2289 memcpy(&lens->main, lzx_default_mainsym_costs, LZX_MAINTREE_NUM_SYMBOLS);
2290 memcpy(&lens->len, lzx_default_lensym_costs, LZX_LENTREE_NUM_SYMBOLS);
2293 /* Literal symbols */
2294 for (i = 0; i < LZX_NUM_CHARS; i++)
2297 /* Match header symbols */
2298 for (; i < LZX_MAINTREE_NUM_SYMBOLS; i++)
2301 /* Length symbols */
2302 for (i = 0; i < LZX_LENTREE_NUM_SYMBOLS; i++)
2306 /* Aligned offset symbols */
2307 for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
2308 lens->aligned[i] = 3;
2312 * lzx_prepare_blocks() -
2314 * Calculate the blocks to split the preprocessed data into.
2316 * Input --- the preprocessed data:
2325 * ctx->cached_matches
2326 * ctx->cached_matches_pos
2327 * ctx->matches_already_found
2329 * Block cost modeling:
2331 * ctx->block_specs (also an output)
2335 * ctx->optimum_cur_idx
2336 * ctx->optimum_end_idx
2337 * ctx->chosen_matches (also an output)
2339 * Output --- the block specifications and the corresponding match/literal data:
2341 * ctx->block_specs[]
2342 * ctx->chosen_matches[]
2344 * The return value is the approximate number of bits the compressed data will
2348 lzx_prepare_blocks(struct lzx_compressor * ctx)
2350 /* This function merely does some initializations, then passes control
2351 * to lzx_prepare_block_recursive(). */
2353 /* 1. Initialize match-finding variables. */
2355 /* Zero all entries in the hash table, indicating that no length-3
2356 * character sequences have been discovered in the input yet. */
2357 memset(ctx->hash_tab, 0, LZX_LZ_HASH_SIZE * 2 * sizeof(ctx->hash_tab[0]));
2358 if (ctx->params.slow.use_len2_matches)
2359 memset(ctx->digram_tab, 0, 256 * 256 * sizeof(ctx->digram_tab[0]));
2360 /* Note: ctx->child_tab need not be initialized. */
2362 /* No matches have been found and cached yet. */
2363 ctx->cached_matches_pos = 0;
2364 ctx->matches_already_found = false;
2366 /* 2. Initialize match-choosing variables. */
2367 ctx->optimum_cur_idx = 0;
2368 ctx->optimum_end_idx = 0;
2369 /* Note: ctx->optimum need not be initialized. */
2370 ctx->block_specs[0].chosen_matches_start_pos = 0;
2372 /* 3. Set block 1 (index 0) to represent the entire input data. */
2373 ctx->block_specs[0].block_size = ctx->window_size;
2374 ctx->block_specs[0].window_pos = 0;
2376 /* 4. Set up a default Huffman symbol cost model for block 1 (index 0).
2377 * The model will be refined later. */
2378 lzx_set_default_costs(&ctx->block_specs[0].codes.lens);
2380 /* 5. Initialize the match offset LRU queue. */
2381 ctx->queue = (struct lzx_lru_queue){1, 1, 1};
2383 /* 6. Pass control to recursive procedure. */
2384 struct lzx_codes * prev_codes = &ctx->zero_codes;
2385 return lzx_prepare_block_recursive(ctx, 1,
2386 ctx->params.slow.num_split_passes,
2391 * This is the fast version of lzx_prepare_blocks(), which "quickly" prepares a
2392 * single compressed block containing the entire input. See the description of
2393 * the "Fast algorithm" at the beginning of this file for more information.
2395 * Input --- the preprocessed data:
2403 * Output --- the block specifications and the corresponding match/literal data:
2405 * ctx->block_specs[]
2406 * ctx->chosen_matches[]
2409 lzx_prepare_block_fast(struct lzx_compressor * ctx)
2411 unsigned num_matches;
2412 struct lzx_freqs freqs;
2413 struct lzx_block_spec *spec;
2415 /* Parameters to hash chain LZ match finder */
2416 static const struct lz_params lzx_lz_params = {
2417 /* LZX_MIN_MATCH == 2, but 2-character matches are rarely
2418 * useful; the minimum match for compression is set to 3
2421 .max_match = LZX_MAX_MATCH,
2422 .good_match = LZX_MAX_MATCH,
2423 .nice_match = LZX_MAX_MATCH,
2424 .max_chain_len = LZX_MAX_MATCH,
2425 .max_lazy_match = LZX_MAX_MATCH,
2429 /* Initialize symbol frequencies and match offset LRU queue. */
2430 memset(&freqs, 0, sizeof(struct lzx_freqs));
2431 ctx->queue = (struct lzx_lru_queue){ 1, 1, 1 };
2433 /* Determine series of matches/literals to output. */
2434 num_matches = lz_analyze_block(ctx->window,
2436 (u32*)ctx->chosen_matches,
2445 /* Set up block specification. */
2446 spec = &ctx->block_specs[0];
2448 spec->block_type = LZX_BLOCKTYPE_ALIGNED;
2449 spec->window_pos = 0;
2450 spec->block_size = ctx->window_size;
2451 spec->num_chosen_matches = num_matches;
2452 spec->chosen_matches_start_pos = 0;
2453 lzx_make_huffman_codes(&freqs, &spec->codes);
2457 do_call_insn_translation(u32 *call_insn_target, int input_pos,
2463 rel_offset = le32_to_cpu(*call_insn_target);
2464 if (rel_offset >= -input_pos && rel_offset < file_size) {
2465 if (rel_offset < file_size - input_pos) {
2466 /* "good translation" */
2467 abs_offset = rel_offset + input_pos;
2469 /* "compensating translation" */
2470 abs_offset = rel_offset - file_size;
2472 *call_insn_target = cpu_to_le32(abs_offset);
2476 /* This is the reverse of undo_call_insn_preprocessing() in lzx-decompress.c.
2477 * See the comment above that function for more information. */
2479 do_call_insn_preprocessing(u8 data[], int size)
2481 for (int i = 0; i < size - 10; i++) {
2482 if (data[i] == 0xe8) {
2483 do_call_insn_translation((u32*)&data[i + 1], i,
2484 LZX_WIM_MAGIC_FILESIZE);
2490 /* API function documented in wimlib.h */
2492 wimlib_lzx_compress2(const void * const restrict uncompressed_data,
2493 unsigned const uncompressed_len,
2494 void * const restrict compressed_data,
2495 struct wimlib_lzx_context * const restrict lzx_ctx)
2497 struct lzx_compressor *ctx = (struct lzx_compressor*)lzx_ctx;
2498 struct output_bitstream ostream;
2499 unsigned compressed_len;
2501 if (uncompressed_len < 100) {
2502 LZX_DEBUG("Too small to bother compressing.");
2506 if (uncompressed_len > 32768) {
2507 LZX_DEBUG("Only up to 32768 bytes of uncompressed data are supported.");
2511 wimlib_assert(lzx_ctx != NULL);
2513 LZX_DEBUG("Attempting to compress %u bytes...", uncompressed_len);
2515 /* The input data must be preprocessed. To avoid changing the original
2516 * input, copy it to a temporary buffer. */
2517 memcpy(ctx->window, uncompressed_data, uncompressed_len);
2518 ctx->window_size = uncompressed_len;
2520 /* This line is unnecessary; it just avoids inconsequential accesses of
2521 * uninitialized memory that would show up in memory-checking tools such
2523 memset(&ctx->window[ctx->window_size], 0, 12);
2525 LZX_DEBUG("Preprocessing data...");
2527 /* Before doing any actual compression, do the call instruction (0xe8
2528 * byte) translation on the uncompressed data. */
2529 do_call_insn_preprocessing(ctx->window, ctx->window_size);
2531 LZX_DEBUG("Preparing blocks...");
2533 /* Prepare the compressed data. */
2534 if (ctx->params.algorithm == WIMLIB_LZX_ALGORITHM_FAST)
2535 lzx_prepare_block_fast(ctx);
2537 lzx_prepare_blocks(ctx);
2539 LZX_DEBUG("Writing compressed blocks...");
2541 /* Generate the compressed data. */
2542 init_output_bitstream(&ostream, compressed_data, ctx->window_size - 1);
2543 lzx_write_all_blocks(ctx, &ostream);
2545 LZX_DEBUG("Flushing bitstream...");
2546 if (flush_output_bitstream(&ostream)) {
2547 /* If the bitstream cannot be flushed, then the output space was
2549 LZX_DEBUG("Data did not compress to less than original length!");
2553 /* Compute the length of the compressed data. */
2554 compressed_len = ostream.bit_output - (u8*)compressed_data;
2556 LZX_DEBUG("Done: compressed %u => %u bytes.",
2557 uncompressed_len, compressed_len);
2559 #if defined(ENABLE_LZX_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION)
2560 /* Verify that we really get the same thing back when decompressing. */
2562 u8 buf[uncompressed_len];
2566 ret = wimlib_lzx_decompress(compressed_data, compressed_len,
2567 buf, uncompressed_len);
2569 ERROR("Failed to decompress data we "
2570 "compressed using LZX algorithm");
2576 const u8 * udata = uncompressed_data;
2577 for (i = 0; i < uncompressed_len; i++) {
2578 if (buf[i] != udata[i]) {
2580 ERROR("Data we compressed using LZX algorithm "
2581 "didn't decompress to original "
2582 "(difference at idx %u: c %#02x, u %#02x)",
2583 i, buf[i], udata[i]);
2592 return compressed_len;
2596 lzx_params_compatible(const struct wimlib_lzx_params *oldparams,
2597 const struct wimlib_lzx_params *newparams)
2599 return 0 == memcmp(oldparams, newparams, sizeof(struct wimlib_lzx_params));
2602 /* API function documented in wimlib.h */
2604 wimlib_lzx_alloc_context(const struct wimlib_lzx_params *params,
2605 struct wimlib_lzx_context **ctx_pp)
2608 LZX_DEBUG("Allocating LZX context...");
2610 struct lzx_compressor *ctx;
2612 static const struct wimlib_lzx_params fast_default = {
2613 .size_of_this = sizeof(struct wimlib_lzx_params),
2614 .algorithm = WIMLIB_LZX_ALGORITHM_FAST,
2619 static const struct wimlib_lzx_params slow_default = {
2620 .size_of_this = sizeof(struct wimlib_lzx_params),
2621 .algorithm = WIMLIB_LZX_ALGORITHM_SLOW,
2624 .use_len2_matches = 1,
2625 .num_fast_bytes = 32,
2626 .num_optim_passes = 3,
2627 .num_split_passes = 3,
2628 .main_nostat_cost = 15,
2629 .len_nostat_cost = 15,
2630 .aligned_nostat_cost = 7,
2634 if (params == NULL) {
2635 LZX_DEBUG("Using default algorithm and parameters.");
2636 params = &slow_default;
2639 if (params->algorithm != WIMLIB_LZX_ALGORITHM_SLOW &&
2640 params->algorithm != WIMLIB_LZX_ALGORITHM_FAST)
2642 LZX_DEBUG("Invalid algorithm.");
2643 return WIMLIB_ERR_INVALID_PARAM;
2646 if (params->use_defaults) {
2647 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW)
2648 params = &slow_default;
2650 params = &fast_default;
2653 if (params->size_of_this != sizeof(struct wimlib_lzx_params)) {
2654 LZX_DEBUG("Invalid parameter structure size!");
2655 return WIMLIB_ERR_INVALID_PARAM;
2658 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
2659 if (params->slow.num_fast_bytes < 3 ||
2660 params->slow.num_fast_bytes > 257)
2662 LZX_DEBUG("Invalid number of fast bytes!");
2663 return WIMLIB_ERR_INVALID_PARAM;
2666 if (params->slow.num_optim_passes < 1)
2668 LZX_DEBUG("Invalid number of optimization passes!");
2669 return WIMLIB_ERR_INVALID_PARAM;
2672 if (params->slow.main_nostat_cost < 1 ||
2673 params->slow.main_nostat_cost > 16)
2675 LZX_DEBUG("Invalid main_nostat_cost!");
2676 return WIMLIB_ERR_INVALID_PARAM;
2679 if (params->slow.len_nostat_cost < 1 ||
2680 params->slow.len_nostat_cost > 16)
2682 LZX_DEBUG("Invalid len_nostat_cost!");
2683 return WIMLIB_ERR_INVALID_PARAM;
2686 if (params->slow.aligned_nostat_cost < 1 ||
2687 params->slow.aligned_nostat_cost > 8)
2689 LZX_DEBUG("Invalid aligned_nostat_cost!");
2690 return WIMLIB_ERR_INVALID_PARAM;
2694 if (ctx_pp == NULL) {
2695 LZX_DEBUG("Check parameters only.");
2699 ctx = *(struct lzx_compressor**)ctx_pp;
2701 if (ctx && lzx_params_compatible(&ctx->params, params))
2704 LZX_DEBUG("Allocating memory.");
2706 ctx = MALLOC(sizeof(struct lzx_compressor));
2710 size_t block_specs_length;
2712 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW)
2713 block_specs_length = ((1 << (params->slow.num_split_passes + 1)) - 1);
2715 block_specs_length = 1;
2716 ctx->block_specs = MALLOC(block_specs_length * sizeof(ctx->block_specs[0]));
2717 if (ctx->block_specs == NULL)
2720 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
2721 ctx->hash_tab = MALLOC((LZX_LZ_HASH_SIZE + 2 * LZX_MAX_WINDOW_SIZE) *
2722 sizeof(ctx->hash_tab[0]));
2723 if (ctx->hash_tab == NULL)
2724 goto err_free_block_specs;
2725 ctx->child_tab = ctx->hash_tab + LZX_LZ_HASH_SIZE;
2727 ctx->hash_tab = NULL;
2728 ctx->child_tab = NULL;
2731 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW &&
2732 params->slow.use_len2_matches)
2734 ctx->digram_tab = MALLOC(256 * 256 * sizeof(ctx->digram_tab[0]));
2735 if (ctx->digram_tab == NULL)
2736 goto err_free_hash_tab;
2738 ctx->digram_tab = NULL;
2741 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
2742 ctx->cached_matches = MALLOC(10 * LZX_MAX_WINDOW_SIZE *
2743 sizeof(ctx->cached_matches[0]));
2744 if (ctx->cached_matches == NULL)
2745 goto err_free_digram_tab;
2747 ctx->cached_matches = NULL;
2750 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
2751 ctx->optimum = MALLOC((LZX_PARAM_OPTIM_ARRAY_SIZE + LZX_MAX_MATCH) *
2752 sizeof(ctx->optimum[0]));
2753 if (ctx->optimum == NULL)
2754 goto err_free_cached_matches;
2756 ctx->optimum = NULL;
2759 size_t chosen_matches_length;
2760 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW)
2761 chosen_matches_length = LZX_MAX_WINDOW_SIZE *
2762 (params->slow.num_split_passes + 1);
2764 chosen_matches_length = LZX_MAX_WINDOW_SIZE;
2766 ctx->chosen_matches = MALLOC(chosen_matches_length *
2767 sizeof(ctx->chosen_matches[0]));
2768 if (ctx->chosen_matches == NULL)
2769 goto err_free_optimum;
2771 memcpy(&ctx->params, params, sizeof(struct wimlib_lzx_params));
2772 memset(&ctx->zero_codes, 0, sizeof(ctx->zero_codes));
2774 LZX_DEBUG("Successfully allocated new LZX context.");
2776 wimlib_lzx_free_context(*ctx_pp);
2777 *ctx_pp = (struct wimlib_lzx_context*)ctx;
2782 err_free_cached_matches:
2783 FREE(ctx->cached_matches);
2784 err_free_digram_tab:
2785 FREE(ctx->digram_tab);
2787 FREE(ctx->hash_tab);
2788 err_free_block_specs:
2789 FREE(ctx->block_specs);
2793 LZX_DEBUG("Ran out of memory.");
2794 return WIMLIB_ERR_NOMEM;
2797 /* API function documented in wimlib.h */
2799 wimlib_lzx_free_context(struct wimlib_lzx_context *_ctx)
2801 struct lzx_compressor *ctx = (struct lzx_compressor*)_ctx;
2804 FREE(ctx->chosen_matches);
2806 FREE(ctx->cached_matches);
2807 FREE(ctx->digram_tab);
2808 FREE(ctx->hash_tab);
2809 FREE(ctx->block_specs);
2814 /* API function documented in wimlib.h */
2816 wimlib_lzx_compress(const void * const restrict uncompressed_data,
2817 unsigned const uncompressed_len,
2818 void * const restrict compressed_data)
2821 struct wimlib_lzx_context *ctx;
2822 unsigned compressed_len;
2824 ret = wimlib_lzx_alloc_context(NULL, &ctx);
2826 wimlib_assert(ret != WIMLIB_ERR_INVALID_PARAM);
2827 WARNING("Couldn't allocate LZX compression context: %"TS"",
2828 wimlib_get_error_string(ret));
2832 compressed_len = wimlib_lzx_compress2(uncompressed_data,
2837 wimlib_lzx_free_context(ctx);
2839 return compressed_len;