4 * LZX compression routines
8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 3 of the License, or (at your option)
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more
22 * You should have received a copy of the GNU General Public License
23 * along with wimlib; if not, see http://www.gnu.org/licenses/.
28 * This file contains a compressor for the LZX compression format, as used in
29 * the WIM file format.
34 * First, the primary reference for the LZX compression format is the
35 * specification released by Microsoft.
37 * Second, the comments in lzx-decompress.c provide some more information about
38 * the LZX compression format, including errors in the Microsoft specification.
40 * Do note that LZX shares many similarities with DEFLATE, the algorithm used by
41 * zlib and gzip. Both LZX and DEFLATE use LZ77 matching and Huffman coding,
42 * and certain other details are quite similar, such as the method for storing
43 * Huffman codes. However, some of the main differences are:
45 * - LZX preprocesses the data before attempting to compress it.
46 * - LZX uses a "main" alphabet which combines literals and matches, with the
47 * match symbols containing a "length header" (giving all or part of the match
48 * length) and a "position footer" (giving, roughly speaking, the order of
49 * magnitude of the match offset).
50 * - LZX does not have static Huffman blocks; however it does have two types of
51 * dynamic Huffman blocks ("aligned offset" and "verbatim").
52 * - LZX has a minimum match length of 2 rather than 3.
53 * - In LZX, match offsets 0 through 2 actually represent entries in a LRU queue
59 * There are actually two distinct overall algorithms implemented here. We
60 * shall refer to them as the "slow" algorithm and the "fast" algorithm. The
61 * "slow" algorithm spends more time compressing to achieve a higher compression
62 * ratio compared to the "fast" algorithm. More details are presented below.
67 * The "slow" algorithm to generate LZX-compressed data is roughly as follows:
69 * 1. Preprocess the input data to translate the targets of x86 call instructions
70 * to absolute offsets.
72 * 2. Determine the best known sequence of LZ77 matches ((offset, length) pairs)
73 * and literal bytes to divide the input into. Raw match-finding is done
74 * using a very clever binary tree search based on the "Bt3" algorithm from
75 * 7-Zip. Parsing, or match-choosing, is solved essentially as a
76 * minimum-cost path problem, but using a heuristic forward search based on
77 * the Deflate encoder from 7-Zip rather than a more intuitive backward
78 * search, the latter of which would naively require that all matches be
79 * found. This heuristic search, as well as other heuristics such as limits
80 * on the matches considered, considerably speed up this part of the
81 * algorithm, which is the main bottleneck. Finally, after matches and
82 * literals are chosen, the needed Huffman codes needed to output them are
85 * 3. Up to a certain number of iterations, use the resulting Huffman codes to
86 * refine a cost model and go back to Step #2 to determine an improved
87 * sequence of matches and literals.
89 * 4. Up to a certain depth, try splitting the current block to see if the
90 * compression ratio can be improved. This may be the case if parts of the
91 * input differ greatly from each other and could benefit from different
94 * 5. Output the resulting block(s) using the match/literal sequences and the
95 * Huffman codes that were computed for each block.
100 * The fast algorithm (and the only one available in wimlib v1.5.1 and earlier)
101 * spends much less time on the main bottlenecks of the compression process ---
102 * that is the match finding, match choosing, and block splitting. Matches are
103 * found and chosen with hash chains using a greedy parse with one position of
104 * look-ahead. No block splitting is done; only compressing the full input into
105 * an aligned offset block is considered.
110 * The old API (retained for backward compatibility) consists of just one function:
112 * wimlib_lzx_compress()
114 * The new compressor has more potential parameters and needs more memory, so
115 * the new API ties up memory allocations and compression parameters into a
118 * wimlib_lzx_alloc_context()
119 * wimlib_lzx_compress2()
120 * wimlib_lzx_free_context()
122 * Both wimlib_lzx_compress() and wimlib_lzx_compress2() are designed to
123 * compress an in-memory buffer of up to 32768 bytes. There is no sliding
124 * window. This is suitable for the WIM format, which uses fixed-size chunks
125 * that are seemingly always 32768 bytes. If needed, the compressor potentially
126 * could be extended to support a larger and/or sliding window.
128 * Both wimlib_lzx_compress() and wimlib_lzx_compress2() return 0 if the data
129 * could not be compressed to less than the size of the uncompressed data.
130 * Again, this is suitable for the WIM format, which stores such data chunks
133 * The functions in this API are exported from the library, although this is
134 * only in case other programs happen to have uses for it other than WIM
135 * reading/writing as already handled through the rest of the library.
140 * Acknowledgments to several other open-source projects that made it possible
141 * to implement this code:
143 * - 7-Zip (author: Igor Pavlov), for the binary tree match-finding
144 * algorithm, the heuristic near-optimal forward match-choosing
145 * algorithm, and the block splitting algorithm.
147 * - zlib (author: Jean-loup Gailly and Mark Adler), for the hash table
148 * match-finding algorithm.
150 * - lzx-compress (author: Matthew T. Russotto), on which some parts of this
151 * code were originally based.
159 #include "wimlib/compress.h"
160 #include "wimlib/error.h"
161 #include "wimlib/lzx.h"
162 #include "wimlib/util.h"
164 #ifdef ENABLE_LZX_DEBUG
165 # include <wimlib/decompress.h>
170 /* Experimental parameters not exposed through the API */
171 #define LZX_PARAM_OPTIM_ARRAY_SIZE 1024
172 #define LZX_PARAM_ACCOUNT_FOR_LRU 1
173 #define LZX_PARAM_DONT_SKIP_MATCHES 0
174 #define LZX_PARAM_USE_EMPIRICAL_DEFAULT_COSTS 1
176 /* Currently, this constant can't simply be changed because the code currently
177 * uses a static number of position slots (and may make other assumptions as
179 #define LZX_MAX_WINDOW_SIZE 32768
181 /* This may be WIM-specific */
182 #define LZX_DEFAULT_BLOCK_SIZE 32768
184 #define LZX_LZ_HASH_BITS 15
185 #define LZX_LZ_HASH_SIZE (1 << LZX_LZ_HASH_BITS)
186 #define LZX_LZ_HASH_MASK (LZX_LZ_HASH_SIZE - 1)
187 #define LZX_LZ_HASH_SHIFT 5
189 /* Codewords for the LZX main, length, and aligned offset Huffman codes */
190 struct lzx_codewords {
191 u16 main[LZX_MAINTREE_NUM_SYMBOLS];
192 u16 len[LZX_LENTREE_NUM_SYMBOLS];
193 u16 aligned[LZX_ALIGNEDTREE_NUM_SYMBOLS];
196 /* Lengths for the LZX main, length, and aligned offset Huffman codes */
198 u8 main[LZX_MAINTREE_NUM_SYMBOLS];
199 u8 len[LZX_LENTREE_NUM_SYMBOLS];
200 u8 aligned[LZX_ALIGNEDTREE_NUM_SYMBOLS];
203 /* The LZX main, length, and aligned offset Huffman codes */
205 struct lzx_codewords codewords;
206 struct lzx_lens lens;
209 /* Tables for tallying symbol frequencies in the three LZX alphabets */
211 freq_t main[LZX_MAINTREE_NUM_SYMBOLS];
212 freq_t len[LZX_LENTREE_NUM_SYMBOLS];
213 freq_t aligned[LZX_ALIGNEDTREE_NUM_SYMBOLS];
216 /* LZX intermediate match/literal format */
220 * 31 1 if a match, 0 if a literal.
222 * 30-25 position slot. This can be at most 50, so it will fit in 6
225 * 8-24 position footer. This is the offset of the real formatted
226 * offset from the position base. This can be at most 17 bits
227 * (since lzx_extra_bits[LZX_NUM_POSITION_SLOTS - 1] is 17).
229 * 0-7 length of match, minus 2. This can be at most
230 * (LZX_MAX_MATCH - 2) == 255, so it will fit in 8 bits. */
234 /* Raw LZ match/literal format: just a length and offset.
236 * The length is the number of bytes of the match, and the offset is the number
237 * of bytes back in the input the match is from the matched text.
239 * If @len < LZX_MIN_MATCH, then it's really just a literal byte and @offset is
246 /* Specification for a LZX block */
247 struct lzx_block_spec {
249 /* Set to 1 if this block has been split (in two --- we only considser
250 * binary splits). In such cases the rest of the fields are
251 * unimportant, since the relevant information is rather in the
252 * structures for the sub-blocks. */
255 /* One of the LZX_BLOCKTYPE_* constants indicating which type of this
259 /* 0-based position in the window at which this block starts. */
262 /* The number of bytes of uncompressed data this block represents. */
265 /* The position in the 'chosen_matches' array in the `struct
266 * lzx_compressor' at which the match/literal specifications for
267 * this block begin. */
268 unsigned chosen_matches_start_pos;
270 /* The number of match/literal specifications for this block. */
271 u16 num_chosen_matches;
273 /* Huffman codes for this block. */
274 struct lzx_codes codes;
278 * An array of these structures is used during the match-choosing algorithm.
279 * They correspond to consecutive positions in the window and are used to keep
280 * track of the cost to reach each position, and the match/literal choices that
281 * need to be chosen to reach that position.
284 /* The approximate minimum cost, in bits, to reach this position in the
285 * window which has been found so far. */
288 /* The union here is just for clarity, since the fields are used in two
289 * slightly different ways. Initially, the @prev structure is filled in
290 * first, and links go from later in the window to earlier in the
291 * window. Later, @next structure is filled in and links go from
292 * earlier in the window to later in the window. */
295 /* Position of the start of the match or literal that
296 * was taken to get to this position in the approximate
297 * minimum-cost parse. */
300 /* Offset (as in a LZ (length, offset) pair) of the
301 * match or literal that was taken to get to this
302 * position in the approximate minimum-cost parse. */
306 /* Position at which the match or literal starting at
307 * this position ends in the minimum-cost parse. */
310 /* Offset (as in a LZ (length, offset) pair) of the
311 * match or literal starting at this position in the
312 * approximate minimum-cost parse. */
316 #if LZX_PARAM_ACCOUNT_FOR_LRU
317 struct lzx_lru_queue queue;
321 /* State of the LZX compressor */
322 struct lzx_compressor {
324 /* The parameters that were used to create the compressor. */
325 struct wimlib_lzx_params params;
327 /* The buffer of data to be compressed.
329 * 0xe8 byte preprocessing is done directly on the data here before
330 * further compression.
332 * Note that this compressor does *not* use a sliding window!!!!
333 * It's not needed in the WIM format, since every chunk is compressed
334 * independently. This is by design, to allow random access to the
337 * We reserve a few extra bytes to potentially allow reading off the end
338 * of the array in the match-finding code for optimization purposes.
340 u8 window[LZX_MAX_WINDOW_SIZE + 12];
342 /* Number of bytes of data to be compressed, which is the number of
343 * bytes of data in @window that are actually valid. */
344 unsigned window_size;
346 /* The current match offset LRU queue. */
347 struct lzx_lru_queue queue;
349 /* Space for sequence of matches/literals that were chosen.
351 * Each LZX_MAX_WINDOW_SIZE-sized portion of this array is used for a
352 * different block splitting level. */
353 struct lzx_match *chosen_matches;
355 /* Structures used during block splitting.
357 * This can be thought of as a binary tree. block_specs[(1) - 1]
358 * represents to the top-level block (root node), and block_specs[(i*2)
359 * - 1] and block_specs[(i*2+1) - 1] represent the sub-blocks (child
360 * nodes) resulting from a binary split of the block represented by
361 * block_spec[(i) - 1].
363 struct lzx_block_spec *block_specs;
365 /* This is simply filled in with zeroes and used to avoid special-casing
366 * the output of the first compressed Huffman code, which conceptually
367 * has a delta taken from a code with all symbols having zero-length
369 struct lzx_codes zero_codes;
371 /* Slow algorithm only: The current cost model. */
372 struct lzx_lens costs;
374 /* Slow algorithm only: Table that maps the hash codes for 3 character
375 * sequences to the most recent position that sequence (or a sequence
376 * sharing the same hash code) appeared in the window. */
379 /* Slow algorithm only: Table that maps 2-character sequences to the
380 * most recent position that sequence appeared in the window. */
383 /* Slow algorithm only: Table that contains the logical child pointers
384 * in the binary trees in the match-finding code.
386 * child_tab[i*2] and child_tab[i*2+1] are the left and right pointers,
387 * respectively, from the binary tree root corresponding to window
391 /* Slow algorithm only: Matches that were already found and are saved in
392 * memory for subsequent queries (e.g. when block splitting). */
393 struct raw_match *cached_matches;
395 /* Slow algorithm only: Next position in 'cached_matches' to either
396 * return or fill in. */
397 unsigned cached_matches_pos;
399 /* Slow algorithm only: %true if reading from 'cached_matches'; %false
400 * if writing to 'cached_matches'. */
401 bool matches_already_found;
403 /* Slow algorithm only: Position in window of next match to return. */
404 unsigned match_window_pos;
406 /* Slow algorithm only: No matches returned shall reach past this
408 unsigned match_window_end;
410 /* Slow algorithm only: Temporary space used for match-choosing
413 * The size of this array must be at least LZX_MAX_MATCH but otherwise
414 * is arbitrary. More space simply allows the match-choosing algorithm
415 * to find better matches (depending on the input, as always). */
416 struct lzx_optimal *optimum;
418 /* Slow algorithm only: Variables used by the match-choosing algorithm.
420 * When matches have been chosen, optimum_cur_idx is set to the position
421 * in the window of the next match/literal to return and optimum_end_idx
422 * is set to the position in the window at the end of the last
423 * match/literal to return. */
428 /* Returns the LZX position slot that corresponds to a given formatted offset.
430 * Logically, this returns the smallest i such that
431 * formatted_offset >= lzx_position_base[i].
433 * The actual implementation below takes advantage of the regularity of the
434 * numbers in the lzx_position_base array to calculate the slot directly from
435 * the formatted offset without actually looking at the array.
438 lzx_get_position_slot(unsigned formatted_offset)
442 * Slots 36-49 (formatted_offset >= 262144) can be found by
443 * (formatted_offset/131072) + 34 == (formatted_offset >> 17) + 34;
444 * however, this check for formatted_offset >= 262144 is commented out
445 * because WIM chunks cannot be that large.
447 if (formatted_offset >= 262144) {
448 return (formatted_offset >> 17) + 34;
452 /* Note: this part here only works if:
454 * 2 <= formatted_offset < 655360
456 * It is < 655360 because the frequency of the position bases
457 * increases starting at the 655360 entry, and it is >= 2
458 * because the below calculation fails if the most significant
459 * bit is lower than the 2's place. */
460 LZX_ASSERT(2 <= formatted_offset && formatted_offset < 655360);
461 unsigned mssb_idx = bsr32(formatted_offset);
462 return (mssb_idx << 1) |
463 ((formatted_offset >> (mssb_idx - 1)) & 1);
467 /* Compute the hash code for the next 3-character sequence in the window. */
469 lzx_lz_compute_hash(const u8 *window)
474 hash <<= LZX_LZ_HASH_SHIFT;
476 hash <<= LZX_LZ_HASH_SHIFT;
478 return hash & LZX_LZ_HASH_MASK;
481 /* Build the main, length, and aligned offset Huffman codes used in LZX.
483 * This takes as input the frequency tables for each code and produces as output
484 * a set of tables that map symbols to codewords and lengths. */
486 lzx_make_huffman_codes(const struct lzx_freqs *freqs,
487 struct lzx_codes *codes)
489 make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS,
490 LZX_MAX_CODEWORD_LEN,
493 codes->codewords.main);
495 make_canonical_huffman_code(LZX_LENTREE_NUM_SYMBOLS,
496 LZX_MAX_CODEWORD_LEN,
499 codes->codewords.len);
501 make_canonical_huffman_code(LZX_ALIGNEDTREE_NUM_SYMBOLS, 8,
504 codes->codewords.aligned);
508 * Output a LZX match.
510 * @out: The bitstream to write the match to.
511 * @block_type: The type of the LZX block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM)
513 * @codes: Pointer to a structure that contains the codewords for the
514 * main, length, and aligned offset Huffman codes.
517 lzx_write_match(struct output_bitstream *out, int block_type,
518 struct lzx_match match, const struct lzx_codes *codes)
520 /* low 8 bits are the match length minus 2 */
521 unsigned match_len_minus_2 = match.data & 0xff;
522 /* Next 17 bits are the position footer */
523 unsigned position_footer = (match.data >> 8) & 0x1ffff; /* 17 bits */
524 /* Next 6 bits are the position slot. */
525 unsigned position_slot = (match.data >> 25) & 0x3f; /* 6 bits */
528 unsigned len_pos_header;
529 unsigned main_symbol;
530 unsigned num_extra_bits;
531 unsigned verbatim_bits;
532 unsigned aligned_bits;
534 /* If the match length is less than MIN_MATCH (= 2) +
535 * NUM_PRIMARY_LENS (= 7), the length header contains
536 * the match length minus MIN_MATCH, and there is no
539 * Otherwise, the length header contains
540 * NUM_PRIMARY_LENS, and the length footer contains
541 * the match length minus NUM_PRIMARY_LENS minus
543 if (match_len_minus_2 < LZX_NUM_PRIMARY_LENS) {
544 len_header = match_len_minus_2;
545 /* No length footer-- mark it with a special
547 len_footer = (unsigned)(-1);
549 len_header = LZX_NUM_PRIMARY_LENS;
550 len_footer = match_len_minus_2 - LZX_NUM_PRIMARY_LENS;
553 /* Combine the position slot with the length header into
554 * a single symbol that will be encoded with the main
556 len_pos_header = (position_slot << 3) | len_header;
558 /* The actual main symbol is offset by LZX_NUM_CHARS because
559 * values under LZX_NUM_CHARS are used to indicate a literal
560 * byte rather than a match. */
561 main_symbol = len_pos_header + LZX_NUM_CHARS;
563 /* Output main symbol. */
564 bitstream_put_bits(out, codes->codewords.main[main_symbol],
565 codes->lens.main[main_symbol]);
567 /* If there is a length footer, output it using the
568 * length Huffman code. */
569 if (len_footer != (unsigned)(-1)) {
570 bitstream_put_bits(out, codes->codewords.len[len_footer],
571 codes->lens.len[len_footer]);
574 num_extra_bits = lzx_get_num_extra_bits(position_slot);
576 /* For aligned offset blocks with at least 3 extra bits, output the
577 * verbatim bits literally, then the aligned bits encoded using the
578 * aligned offset tree. Otherwise, only the verbatim bits need to be
580 if ((block_type == LZX_BLOCKTYPE_ALIGNED) && (num_extra_bits >= 3)) {
582 verbatim_bits = position_footer >> 3;
583 bitstream_put_bits(out, verbatim_bits,
586 aligned_bits = (position_footer & 7);
587 bitstream_put_bits(out,
588 codes->codewords.aligned[aligned_bits],
589 codes->lens.aligned[aligned_bits]);
591 /* verbatim bits is the same as the position
592 * footer, in this case. */
593 bitstream_put_bits(out, position_footer, num_extra_bits);
598 lzx_build_precode(const u8 lens[restrict],
599 const u8 prev_lens[restrict],
601 freq_t precode_freqs[restrict LZX_PRETREE_NUM_SYMBOLS],
602 u8 output_syms[restrict num_syms],
603 u8 precode_lens[restrict LZX_PRETREE_NUM_SYMBOLS],
604 u16 precode_codewords[restrict LZX_PRETREE_NUM_SYMBOLS],
605 unsigned * num_additional_bits_ret)
607 unsigned output_syms_idx;
608 unsigned cur_run_len;
611 unsigned additional_bits;
613 unsigned num_additional_bits = 0;
615 memset(precode_freqs, 0,
616 LZX_PRETREE_NUM_SYMBOLS * sizeof(precode_freqs[0]));
618 /* Since the code word lengths use a form of RLE encoding, the goal here
619 * is to find each run of identical lengths when going through them in
620 * symbol order (including runs of length 1). For each run, as many
621 * lengths are encoded using RLE as possible, and the rest are output
624 * output_syms[] will be filled in with the length symbols that will be
625 * output, including RLE codes, not yet encoded using the pre-tree.
627 * cur_run_len keeps track of how many code word lengths are in the
628 * current run of identical lengths.
632 for (i = 1; i <= num_syms; i++) {
634 if (i != num_syms && lens[i] == lens[i - 1]) {
635 /* Still in a run--- keep going. */
640 /* Run ended! Check if it is a run of zeroes or a run of
643 /* The symbol that was repeated in the run--- not to be confused
644 * with the length *of* the run (cur_run_len) */
645 len_in_run = lens[i - 1];
647 if (len_in_run == 0) {
648 /* A run of 0's. Encode it in as few length
649 * codes as we can. */
651 /* The magic length 18 indicates a run of 20 + n zeroes,
652 * where n is an uncompressed literal 5-bit integer that
653 * follows the magic length. */
654 while (cur_run_len >= 20) {
656 additional_bits = min(cur_run_len - 20, 0x1f);
657 num_additional_bits += 5;
659 output_syms[output_syms_idx++] = 18;
660 output_syms[output_syms_idx++] = additional_bits;
661 cur_run_len -= 20 + additional_bits;
664 /* The magic length 17 indicates a run of 4 + n zeroes,
665 * where n is an uncompressed literal 4-bit integer that
666 * follows the magic length. */
667 while (cur_run_len >= 4) {
668 additional_bits = min(cur_run_len - 4, 0xf);
669 num_additional_bits += 4;
671 output_syms[output_syms_idx++] = 17;
672 output_syms[output_syms_idx++] = additional_bits;
673 cur_run_len -= 4 + additional_bits;
678 /* A run of nonzero lengths. */
680 /* The magic length 19 indicates a run of 4 + n
681 * nonzeroes, where n is a literal bit that follows the
682 * magic length, and where the value of the lengths in
683 * the run is given by an extra length symbol, encoded
684 * with the precode, that follows the literal bit.
686 * The extra length symbol is encoded as a difference
687 * from the length of the codeword for the first symbol
688 * in the run in the previous tree.
690 while (cur_run_len >= 4) {
691 additional_bits = (cur_run_len > 4);
692 num_additional_bits += 1;
693 delta = (signed char)prev_lens[i - cur_run_len] -
694 (signed char)len_in_run;
698 precode_freqs[(unsigned char)delta]++;
699 output_syms[output_syms_idx++] = 19;
700 output_syms[output_syms_idx++] = additional_bits;
701 output_syms[output_syms_idx++] = delta;
702 cur_run_len -= 4 + additional_bits;
706 /* Any remaining lengths in the run are outputted without RLE,
707 * as a difference from the length of that codeword in the
709 while (cur_run_len > 0) {
710 delta = (signed char)prev_lens[i - cur_run_len] -
711 (signed char)len_in_run;
715 precode_freqs[(unsigned char)delta]++;
716 output_syms[output_syms_idx++] = delta;
723 /* Build the precode from the frequencies of the length symbols. */
725 make_canonical_huffman_code(LZX_PRETREE_NUM_SYMBOLS,
726 LZX_MAX_CODEWORD_LEN,
727 precode_freqs, precode_lens,
730 if (num_additional_bits_ret)
731 *num_additional_bits_ret = num_additional_bits;
733 return output_syms_idx;
737 * Writes a compressed Huffman code to the output, preceded by the precode for
740 * The Huffman code is represented in the output as a series of path lengths
741 * from which the canonical Huffman code can be reconstructed. The path lengths
742 * themselves are compressed using a separate Huffman code, the precode, which
743 * consists of LZX_PRETREE_NUM_SYMBOLS (= 20) symbols that cover all possible
744 * code lengths, plus extra codes for repeated lengths. The path lengths of the
745 * precode precede the path lengths of the larger code and are uncompressed,
746 * consisting of 20 entries of 4 bits each.
748 * @out: Bitstream to write the code to.
749 * @lens: The code lengths for the Huffman code, indexed by symbol.
750 * @prev_lens: Code lengths for this Huffman code, indexed by symbol,
751 * in the *previous block*, or all zeroes if this is the
753 * @num_syms: The number of symbols in the code.
756 lzx_write_compressed_code(struct output_bitstream *out,
757 const u8 lens[restrict],
758 const u8 prev_lens[restrict],
761 freq_t precode_freqs[LZX_PRETREE_NUM_SYMBOLS];
762 u8 output_syms[num_syms];
763 u8 precode_lens[LZX_PRETREE_NUM_SYMBOLS];
764 u16 precode_codewords[LZX_PRETREE_NUM_SYMBOLS];
766 unsigned num_output_syms;
769 num_output_syms = lzx_build_precode(lens,
778 /* Write the lengths of the precode codes to the output. */
779 for (i = 0; i < LZX_PRETREE_NUM_SYMBOLS; i++)
780 bitstream_put_bits(out, precode_lens[i],
781 LZX_PRETREE_ELEMENT_SIZE);
783 /* Write the length symbols, encoded with the precode, to the output. */
785 for (i = 0; i < num_output_syms; ) {
786 precode_sym = output_syms[i++];
788 bitstream_put_bits(out, precode_codewords[precode_sym],
789 precode_lens[precode_sym]);
790 switch (precode_sym) {
792 bitstream_put_bits(out, output_syms[i++], 4);
795 bitstream_put_bits(out, output_syms[i++], 5);
798 bitstream_put_bits(out, output_syms[i++], 1);
799 bitstream_put_bits(out,
800 precode_codewords[output_syms[i]],
801 precode_lens[output_syms[i]]);
811 * Writes all compressed matches and literal bytes in a LZX block to the the
815 * The output bitstream.
817 * The type of the block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM).
819 * The array of matches/literals that will be output (length @match_count).
821 * Number of matches/literals to be output.
823 * Pointer to a structure that contains the codewords for the main, length,
824 * and aligned offset Huffman codes.
827 lzx_write_matches_and_literals(struct output_bitstream *ostream,
829 const struct lzx_match match_tab[],
830 unsigned match_count,
831 const struct lzx_codes *codes)
833 for (unsigned i = 0; i < match_count; i++) {
834 struct lzx_match match = match_tab[i];
836 /* High bit of the match indicates whether the match is an
837 * actual match (1) or a literal uncompressed byte (0) */
838 if (match.data & 0x80000000) {
840 lzx_write_match(ostream, block_type,
844 bitstream_put_bits(ostream,
845 codes->codewords.main[match.data],
846 codes->lens.main[match.data]);
853 lzx_assert_codes_valid(const struct lzx_codes * codes)
855 #ifdef ENABLE_LZX_DEBUG
858 for (i = 0; i < LZX_MAINTREE_NUM_SYMBOLS; i++)
859 LZX_ASSERT(codes->lens.main[i] <= LZX_MAX_CODEWORD_LEN);
861 for (i = 0; i < LZX_LENTREE_NUM_SYMBOLS; i++)
862 LZX_ASSERT(codes->lens.len[i] <= LZX_MAX_CODEWORD_LEN);
864 for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
865 LZX_ASSERT(codes->lens.aligned[i] <= 8);
867 const unsigned tablebits = 10;
868 u16 decode_table[(1 << tablebits) +
869 (2 * max(LZX_MAINTREE_NUM_SYMBOLS, LZX_LENTREE_NUM_SYMBOLS))]
870 _aligned_attribute(DECODE_TABLE_ALIGNMENT);
871 LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
872 LZX_MAINTREE_NUM_SYMBOLS,
875 LZX_MAX_CODEWORD_LEN));
876 LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
877 LZX_LENTREE_NUM_SYMBOLS,
880 LZX_MAX_CODEWORD_LEN));
881 LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
882 LZX_ALIGNEDTREE_NUM_SYMBOLS,
886 #endif /* ENABLE_LZX_DEBUG */
889 /* Write a LZX aligned offset or verbatim block to the output. */
891 lzx_write_compressed_block(int block_type,
893 struct lzx_match * chosen_matches,
894 unsigned num_chosen_matches,
895 const struct lzx_codes * codes,
896 const struct lzx_codes * prev_codes,
897 struct output_bitstream * ostream)
901 LZX_ASSERT(block_type == LZX_BLOCKTYPE_ALIGNED ||
902 block_type == LZX_BLOCKTYPE_VERBATIM);
903 LZX_ASSERT(block_size <= LZX_MAX_WINDOW_SIZE);
904 LZX_ASSERT(num_chosen_matches <= LZX_MAX_WINDOW_SIZE);
905 lzx_assert_codes_valid(codes);
907 /* The first three bits indicate the type of block and are one of the
908 * LZX_BLOCKTYPE_* constants. */
909 bitstream_put_bits(ostream, block_type, LZX_BLOCKTYPE_NBITS);
911 /* The next bit indicates whether the block size is the default (32768),
912 * indicated by a 1 bit, or whether the block size is given by the next
913 * 16 bits, indicated by a 0 bit. */
914 if (block_size == LZX_DEFAULT_BLOCK_SIZE) {
915 bitstream_put_bits(ostream, 1, 1);
917 bitstream_put_bits(ostream, 0, 1);
918 bitstream_put_bits(ostream, block_size, LZX_BLOCKSIZE_NBITS);
921 /* Write out lengths of the main code. Note that the LZX specification
922 * incorrectly states that the aligned offset code comes after the
923 * length code, but in fact it is the very first tree to be written
924 * (before the main code). */
925 if (block_type == LZX_BLOCKTYPE_ALIGNED)
926 for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
927 bitstream_put_bits(ostream, codes->lens.aligned[i],
928 LZX_ALIGNEDTREE_ELEMENT_SIZE);
930 LZX_DEBUG("Writing main code...");
932 /* Write the pre-tree and lengths for the first LZX_NUM_CHARS symbols in
933 * the main code, which are the codewords for literal bytes. */
934 lzx_write_compressed_code(ostream,
936 prev_codes->lens.main,
939 /* Write the pre-tree and lengths for the rest of the main code, which
940 * are the codewords for match headers. */
941 lzx_write_compressed_code(ostream,
942 codes->lens.main + LZX_NUM_CHARS,
943 prev_codes->lens.main + LZX_NUM_CHARS,
944 LZX_MAINTREE_NUM_SYMBOLS - LZX_NUM_CHARS);
946 LZX_DEBUG("Writing length code...");
948 /* Write the pre-tree and lengths for the length code. */
949 lzx_write_compressed_code(ostream,
951 prev_codes->lens.len,
952 LZX_LENTREE_NUM_SYMBOLS);
954 LZX_DEBUG("Writing matches and literals...");
956 /* Write the actual matches and literals. */
957 lzx_write_matches_and_literals(ostream, block_type,
958 chosen_matches, num_chosen_matches,
961 LZX_DEBUG("Done writing block.");
964 /* Write the LZX block of index @block_number, or write its children recursively
965 * if it is a split block.
967 * @prev_codes is a pointer to the Huffman codes for the most recent block
968 * written, or all zeroes if this is the first block.
970 * Return a pointer to the Huffman codes for the last block written. */
971 static struct lzx_codes *
972 lzx_write_block_recursive(struct lzx_compressor *ctx,
973 unsigned block_number,
974 struct lzx_codes * prev_codes,
975 struct output_bitstream *ostream)
977 struct lzx_block_spec *spec = &ctx->block_specs[block_number - 1];
979 if (spec->is_split) {
980 prev_codes = lzx_write_block_recursive(ctx, block_number * 2 + 0,
981 prev_codes, ostream);
982 prev_codes = lzx_write_block_recursive(ctx, block_number * 2 + 1,
983 prev_codes, ostream);
985 LZX_DEBUG("Writing block #%u (type=%d, size=%u, num_chosen_matches=%u)...",
986 block_number, spec->block_type, spec->block_size,
987 spec->num_chosen_matches);
988 lzx_write_compressed_block(spec->block_type,
990 &ctx->chosen_matches[spec->chosen_matches_start_pos],
991 spec->num_chosen_matches,
995 prev_codes = &spec->codes;
1000 /* Write out the LZX blocks that were computed. */
1002 lzx_write_all_blocks(struct lzx_compressor *ctx, struct output_bitstream *ostream)
1004 lzx_write_block_recursive(ctx, 1, &ctx->zero_codes, ostream);
1008 lzx_record_literal(u8 literal, void *_freqs)
1010 struct lzx_freqs *freqs = _freqs;
1012 freqs->main[literal]++;
1014 return (u32)literal;
1017 /* Constructs a match from an offset and a length, and updates the LRU queue and
1018 * the frequency of symbols in the main, length, and aligned offset alphabets.
1019 * The return value is a 32-bit number that provides the match in an
1020 * intermediate representation documented below. */
1022 lzx_record_match(unsigned match_offset, unsigned match_len,
1023 void *_freqs, void *_queue)
1025 struct lzx_freqs *freqs = _freqs;
1026 struct lzx_lru_queue *queue = _queue;
1027 unsigned position_slot;
1028 unsigned position_footer = 0;
1031 unsigned len_footer;
1032 unsigned adjusted_match_len;
1034 LZX_ASSERT(match_len >= LZX_MIN_MATCH && match_len <= LZX_MAX_MATCH);
1036 /* If possible, encode this offset as a repeated offset. */
1037 if (match_offset == queue->R0) {
1039 } else if (match_offset == queue->R1) {
1040 swap(queue->R0, queue->R1);
1042 } else if (match_offset == queue->R2) {
1043 swap(queue->R0, queue->R2);
1046 /* Not a repeated offset. */
1048 /* offsets of 0, 1, and 2 are reserved for the repeated offset
1049 * codes, so non-repeated offsets must be encoded as 3+. The
1050 * minimum offset is 1, so encode the offsets offset by 2. */
1051 unsigned formatted_offset = match_offset + 2;
1053 queue->R2 = queue->R1;
1054 queue->R1 = queue->R0;
1055 queue->R0 = match_offset;
1057 /* The (now-formatted) offset will actually be encoded as a
1058 * small position slot number that maps to a certain hard-coded
1059 * offset (position base), followed by a number of extra bits---
1060 * the position footer--- that are added to the position base to
1061 * get the original formatted offset. */
1063 position_slot = lzx_get_position_slot(formatted_offset);
1064 position_footer = formatted_offset &
1065 ((1 << lzx_get_num_extra_bits(position_slot)) - 1);
1068 adjusted_match_len = match_len - LZX_MIN_MATCH;
1071 /* The match length must be at least 2, so let the adjusted match length
1072 * be the match length minus 2.
1074 * If it is less than 7, the adjusted match length is encoded as a 3-bit
1075 * number offset by 2. Otherwise, the 3-bit length header is all 1's
1076 * and the actual adjusted length is given as a symbol encoded with the
1077 * length tree, offset by 7.
1079 if (adjusted_match_len < LZX_NUM_PRIMARY_LENS) {
1080 len_header = adjusted_match_len;
1082 len_header = LZX_NUM_PRIMARY_LENS;
1083 len_footer = adjusted_match_len - LZX_NUM_PRIMARY_LENS;
1084 freqs->len[len_footer]++;
1086 len_pos_header = (position_slot << 3) | len_header;
1088 freqs->main[len_pos_header + LZX_NUM_CHARS]++;
1091 * if (lzx_extra_bits[position_slot] >= 3) */
1092 if (position_slot >= 8)
1093 freqs->aligned[position_footer & 7]++;
1095 /* Pack the position slot, position footer, and match length into an
1096 * intermediate representation.
1099 * ---- -----------------------------------------------------------
1101 * 31 1 if a match, 0 if a literal.
1103 * 30-25 position slot. This can be at most 50, so it will fit in 6
1106 * 8-24 position footer. This is the offset of the real formatted
1107 * offset from the position base. This can be at most 17 bits
1108 * (since lzx_extra_bits[LZX_NUM_POSITION_SLOTS - 1] is 17).
1110 * 0-7 length of match, offset by 2. This can be at most
1111 * (LZX_MAX_MATCH - 2) == 255, so it will fit in 8 bits. */
1113 (position_slot << 25) |
1114 (position_footer << 8) |
1115 (adjusted_match_len);
1118 /* Set the cost model @ctx->costs from the Huffman codeword lengths specified in
1121 * These are basically the same thing, except that the Huffman codewords with
1122 * length 0 correspond to symbols with zero frequency that still need to be
1123 * assigned actual costs. The specific values assigned are arbitrary, but they
1124 * should be fairly high (near the maximum codeword length) to take into account
1125 * the fact that uses of these symbols are expected to be rare.
1128 lzx_set_costs(struct lzx_compressor * ctx, const struct lzx_lens * lens)
1132 memcpy(&ctx->costs, lens, sizeof(struct lzx_lens));
1134 for (i = 0; i < LZX_MAINTREE_NUM_SYMBOLS; i++)
1135 if (ctx->costs.main[i] == 0)
1136 ctx->costs.main[i] = ctx->params.slow.main_nostat_cost;
1138 for (i = 0; i < LZX_LENTREE_NUM_SYMBOLS; i++)
1139 if (ctx->costs.len[i] == 0)
1140 ctx->costs.len[i] = ctx->params.slow.len_nostat_cost;
1142 for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
1143 if (ctx->costs.aligned[i] == 0)
1144 ctx->costs.aligned[i] = ctx->params.slow.aligned_nostat_cost;
1148 lzx_literal_cost(u8 c, const struct lzx_lens * costs)
1150 return costs->main[c];
1153 /* Given a (length, offset) pair that could be turned into a valid LZX match as
1154 * well as costs for the codewords in the main, length, and aligned Huffman
1155 * codes, return the approximate number of bits it will take to represent this
1156 * match in the compressed output. */
1158 lzx_match_cost(unsigned length, unsigned offset, const struct lzx_lens *costs
1160 #if LZX_PARAM_ACCOUNT_FOR_LRU
1161 , struct lzx_lru_queue *queue
1165 unsigned position_slot, len_header, main_symbol;
1168 /* Calculate position slot and length header, then combine them into the
1171 #if LZX_PARAM_ACCOUNT_FOR_LRU
1172 if (offset == queue->R0) {
1174 } else if (offset == queue->R1) {
1175 swap(queue->R0, queue->R1);
1177 } else if (offset == queue->R2) {
1178 swap(queue->R0, queue->R2);
1182 position_slot = lzx_get_position_slot(offset + 2);
1184 len_header = min(length - LZX_MIN_MATCH, LZX_NUM_PRIMARY_LENS);
1185 main_symbol = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
1187 /* Account for main symbol. */
1188 cost += costs->main[main_symbol];
1190 /* Account for extra position information. */
1191 unsigned num_extra_bits = lzx_get_num_extra_bits(position_slot);
1192 if (num_extra_bits >= 3) {
1193 cost += num_extra_bits - 3;
1194 cost += costs->aligned[(offset + LZX_MIN_MATCH) & 7];
1196 cost += num_extra_bits;
1199 /* Account for extra length information. */
1200 if (length - LZX_MIN_MATCH >= LZX_NUM_PRIMARY_LENS)
1201 cost += costs->len[length - LZX_MIN_MATCH - LZX_NUM_PRIMARY_LENS];
1206 /* This procedure effectively creates a new binary tree corresponding to the
1207 * current string at the same time that it searches the existing tree nodes for
1208 * matches. This is the same algorithm as that used in GetMatchesSpec1() in
1209 * 7-Zip, but it is hopefully explained a little more clearly below. */
1211 lzx_lz_get_matches(const u8 window[restrict],
1212 const unsigned bytes_remaining,
1213 const unsigned strstart,
1214 const unsigned max_length,
1215 u16 child_tab[restrict],
1217 const unsigned prev_len,
1218 struct raw_match * const matches)
1220 u16 *new_tree_lt_ptr = &child_tab[strstart * 2];
1221 u16 *new_tree_gt_ptr = &child_tab[strstart * 2 + 1];
1223 u16 longest_lt_match_len = 0;
1224 u16 longest_gt_match_len = 0;
1226 /* Maximum number of nodes to walk down before stopping */
1227 unsigned depth = max_length;
1229 /* Length of longest match found so far */
1230 unsigned longest_match_len = prev_len;
1232 /* Maximum length of match to return */
1233 unsigned len_limit = min(bytes_remaining, max_length);
1235 /* Number of matches found so far */
1236 unsigned num_matches = 0;
1240 /* Stop if too many nodes were traversed or if there is no next
1242 if (depth-- == 0 || cur_match == 0) {
1243 *new_tree_gt_ptr = 0;
1244 *new_tree_lt_ptr = 0;
1248 /* Load the pointers to the children of the binary tree node
1249 * corresponding to the current match */
1250 u16 * const cur_match_ptrs = &child_tab[cur_match * 2];
1252 /* Set up pointers to the current match and to the current
1254 const u8 * const matchptr = &window[cur_match];
1255 const u8 * const strptr = &window[strstart];
1257 /* Determine position at which to start comparing */
1258 u16 len = min(longest_lt_match_len,
1259 longest_gt_match_len);
1261 if (matchptr[len] == strptr[len]) {
1263 /* Extend the match as far as possible. */
1264 while (++len != len_limit)
1265 if (matchptr[len] != strptr[len])
1268 /* Record this match if it is the longest found so far.
1270 if (len > longest_match_len) {
1271 longest_match_len = len;
1272 matches[num_matches].len = len;
1273 matches[num_matches].offset = strstart - cur_match;
1276 if (len == len_limit) {
1277 /* Length limit was reached. Link left pointer
1278 * in the new tree with left subtree of current
1279 * match tree, and link the right pointer in the
1280 * new tree with the right subtree of the
1281 * current match tree. This in effect deletes
1282 * the node for the currrent match, which is
1283 * desirable because the current match is the
1284 * same as the current string up until the
1285 * length limit, so in subsequent queries it
1286 * will never be preferable to the current
1288 *new_tree_lt_ptr = cur_match_ptrs[0];
1289 *new_tree_gt_ptr = cur_match_ptrs[1];
1295 if (matchptr[len] < strptr[len]) {
1296 /* Case 1: The current match is lexicographically less
1297 * than the current string.
1299 * Since we are searching the binary tree structures, we
1300 * need to walk down to the *right* subtree of the
1301 * current match's node to get to a match that is
1302 * lexicographically *greater* than the current match
1303 * but still lexicographically *lesser* than the current
1306 * At the same time, we link the entire binary tree
1307 * corresponding to the current match into the
1308 * appropriate place in the new binary tree being built
1309 * for the current string. */
1310 *new_tree_lt_ptr = cur_match;
1311 new_tree_lt_ptr = &cur_match_ptrs[1];
1312 cur_match = *new_tree_lt_ptr;
1313 longest_lt_match_len = len;
1315 /* Case 2: The current match is lexicographically
1316 * greater than the current string.
1318 * This is analogous to Case 1 above, but everything
1319 * happens in the other direction.
1321 *new_tree_gt_ptr = cur_match;
1322 new_tree_gt_ptr = &cur_match_ptrs[0];
1323 cur_match = *new_tree_gt_ptr;
1324 longest_gt_match_len = len;
1329 /* Equivalent to lzx_lz_get_matches(), but only updates the tree and doesn't
1330 * return matches. See that function for details (including comments). */
1332 lzx_lz_skip_matches(const u8 window[restrict],
1333 const unsigned bytes_remaining,
1334 const unsigned strstart,
1335 const unsigned max_length,
1336 u16 child_tab[restrict],
1338 const unsigned prev_len)
1340 u16 *new_tree_lt_ptr = &child_tab[strstart * 2];
1341 u16 *new_tree_gt_ptr = &child_tab[strstart * 2 + 1];
1343 u16 longest_lt_match_len = 0;
1344 u16 longest_gt_match_len = 0;
1346 unsigned depth = max_length;
1348 unsigned longest_match_len = prev_len;
1350 unsigned len_limit = min(bytes_remaining, max_length);
1353 if (depth-- == 0 || cur_match == 0) {
1354 *new_tree_gt_ptr = 0;
1355 *new_tree_lt_ptr = 0;
1359 u16 * const cur_match_ptrs = &child_tab[cur_match * 2];
1361 const u8 * const matchptr = &window[cur_match];
1362 const u8 * const strptr = &window[strstart];
1364 u16 len = min(longest_lt_match_len,
1365 longest_gt_match_len);
1367 if (matchptr[len] == strptr[len]) {
1368 while (++len != len_limit)
1369 if (matchptr[len] != strptr[len])
1372 if (len > longest_match_len) {
1373 longest_match_len = len;
1375 if (len == len_limit) {
1376 *new_tree_lt_ptr = cur_match_ptrs[0];
1377 *new_tree_gt_ptr = cur_match_ptrs[1];
1383 if (matchptr[len] < strptr[len]) {
1384 *new_tree_lt_ptr = cur_match;
1385 new_tree_lt_ptr = &cur_match_ptrs[1];
1386 cur_match = *new_tree_lt_ptr;
1387 longest_lt_match_len = len;
1389 *new_tree_gt_ptr = cur_match;
1390 new_tree_gt_ptr = &cur_match_ptrs[0];
1391 cur_match = *new_tree_gt_ptr;
1392 longest_gt_match_len = len;
1398 lzx_lz_get_matches_caching(struct lzx_compressor *ctx,
1399 struct raw_match **matches_ret);
1401 /* Tell the match-finder to skip the specified number of bytes (@n) in the
1404 lzx_lz_skip_bytes(struct lzx_compressor *ctx, unsigned n)
1407 #if LZX_PARAM_DONT_SKIP_MATCHES
1408 /* Option 1: Still cache the matches from the positions skipped. They
1409 * will then be available in later passes. */
1410 struct raw_match *matches;
1412 lzx_lz_get_matches_caching(ctx, &matches);
1414 /* Option 2: Mark the positions skipped as having no matches available,
1415 * but we still need to update the binary tree in case subsequent
1416 * positions have matches at the current position. */
1417 LZX_ASSERT(n <= ctx->match_window_end - ctx->match_window_pos);
1418 if (ctx->matches_already_found) {
1420 LZX_ASSERT(ctx->cached_matches[ctx->cached_matches_pos].offset ==
1421 ctx->match_window_pos);
1422 ctx->cached_matches_pos += ctx->cached_matches[ctx->cached_matches_pos].len + 1;
1423 ctx->match_window_pos++;
1427 if (ctx->params.slow.use_len2_matches &&
1428 ctx->match_window_end - ctx->match_window_pos >= 2) {
1429 unsigned c1 = ctx->window[ctx->match_window_pos];
1430 unsigned c2 = ctx->window[ctx->match_window_pos + 1];
1431 unsigned digram = c1 | (c2 << 8);
1432 ctx->digram_tab[digram] = ctx->match_window_pos;
1434 if (ctx->match_window_end - ctx->match_window_pos >= 3) {
1438 hash = lzx_lz_compute_hash(&ctx->window[ctx->match_window_pos]);
1440 cur_match = ctx->hash_tab[hash];
1441 ctx->hash_tab[hash] = ctx->match_window_pos;
1443 lzx_lz_skip_matches(ctx->window,
1444 ctx->match_window_end - ctx->match_window_pos,
1445 ctx->match_window_pos,
1446 ctx->params.slow.num_fast_bytes,
1450 ctx->cached_matches[ctx->cached_matches_pos].len = 0;
1451 ctx->cached_matches[ctx->cached_matches_pos].offset = ctx->match_window_pos;
1452 ctx->cached_matches_pos++;
1453 ctx->match_window_pos++;
1456 #endif /* !LZX_PARAM_DONT_SKIP_MATCHES */
1459 /* Retrieve a list of matches available at the next position in the input.
1461 * The return value is the number of matches found, and a pointer to them is
1462 * written to @matches_ret. The matches will be sorted in order by length.
1464 * This is essentially a wrapper around lzx_lz_get_matches() that caches its
1465 * output the first time and also performs the needed hashing.
1468 lzx_lz_get_matches_caching(struct lzx_compressor *ctx,
1469 struct raw_match **matches_ret)
1471 unsigned num_matches;
1472 struct raw_match *matches;
1474 LZX_ASSERT(ctx->match_window_end >= ctx->match_window_pos);
1476 matches = &ctx->cached_matches[ctx->cached_matches_pos + 1];
1478 if (ctx->matches_already_found) {
1479 num_matches = ctx->cached_matches[ctx->cached_matches_pos].len;
1480 LZX_ASSERT(ctx->cached_matches[ctx->cached_matches_pos].offset == ctx->match_window_pos);
1482 for (int i = (int)num_matches - 1; i >= 0; i--) {
1483 if (ctx->match_window_pos + matches[i].len > ctx->match_window_end)
1484 matches[i].len = ctx->match_window_end - ctx->match_window_pos;
1489 unsigned prev_len = 1;
1490 struct raw_match * matches_ret = &ctx->cached_matches[ctx->cached_matches_pos + 1];
1493 if (ctx->params.slow.use_len2_matches &&
1494 ctx->match_window_end - ctx->match_window_pos >= 3) {
1495 unsigned c1 = ctx->window[ctx->match_window_pos];
1496 unsigned c2 = ctx->window[ctx->match_window_pos + 1];
1497 unsigned digram = c1 | (c2 << 8);
1500 cur_match = ctx->digram_tab[digram];
1501 ctx->digram_tab[digram] = ctx->match_window_pos;
1502 if (cur_match != 0 &&
1503 ctx->window[cur_match + 2] != ctx->window[ctx->match_window_pos + 2])
1505 matches_ret->len = 2;
1506 matches_ret->offset = ctx->match_window_pos - cur_match;
1512 if (ctx->match_window_end - ctx->match_window_pos >= 3) {
1516 hash = lzx_lz_compute_hash(&ctx->window[ctx->match_window_pos]);
1518 cur_match = ctx->hash_tab[hash];
1519 ctx->hash_tab[hash] = ctx->match_window_pos;
1520 num_matches += lzx_lz_get_matches(ctx->window,
1521 ctx->match_window_end - ctx->match_window_pos,
1522 ctx->match_window_pos,
1523 ctx->params.slow.num_fast_bytes,
1530 ctx->cached_matches[ctx->cached_matches_pos].len = num_matches;
1531 ctx->cached_matches[ctx->cached_matches_pos].offset = ctx->match_window_pos;
1534 struct raw_match *longest_match_ptr =
1535 &ctx->cached_matches[ctx->cached_matches_pos + 1 +
1537 u16 len = longest_match_ptr->len;
1539 /* If the longest match returned by the match-finder
1540 * reached the number of fast bytes, extend it as much
1542 if (len == ctx->params.slow.num_fast_bytes) {
1543 const unsigned maxlen =
1544 min(ctx->match_window_end - ctx->match_window_pos,
1547 const u8 * const matchptr =
1548 &ctx->window[ctx->match_window_pos - longest_match_ptr->offset];
1550 const u8 * const strptr =
1551 &ctx->window[ctx->match_window_pos];
1553 while (len < maxlen && matchptr[len] == strptr[len])
1556 longest_match_ptr->len = len;
1559 ctx->cached_matches_pos += num_matches + 1;
1560 *matches_ret = matches;
1564 for (unsigned i = 0; i < num_matches; i++)
1566 printf("Len %u Offset %u\n", matches[i].len, matches[i].offset);
1570 for (unsigned i = 0; i < num_matches; i++) {
1571 LZX_ASSERT(matches[i].len <= LZX_MAX_MATCH);
1572 if (matches[i].len >= LZX_MIN_MATCH) {
1573 LZX_ASSERT(matches[i].offset <= ctx->match_window_pos);
1574 LZX_ASSERT(matches[i].len <= ctx->match_window_end - ctx->match_window_pos);
1575 LZX_ASSERT(!memcmp(&ctx->window[ctx->match_window_pos],
1576 &ctx->window[ctx->match_window_pos - matches[i].offset],
1581 ctx->match_window_pos++;
1586 * Reverse the linked list of near-optimal matches so that they can be returned
1587 * in forwards order.
1589 * Returns the first match in the list.
1591 static struct raw_match
1592 lzx_lz_reverse_near_optimal_match_list(struct lzx_compressor *ctx,
1595 unsigned prev_link, saved_prev_link;
1596 unsigned prev_match_offset, saved_prev_match_offset;
1598 ctx->optimum_end_idx = cur_pos;
1600 saved_prev_link = ctx->optimum[cur_pos].prev.link;
1601 saved_prev_match_offset = ctx->optimum[cur_pos].prev.match_offset;
1604 prev_link = saved_prev_link;
1605 prev_match_offset = saved_prev_match_offset;
1607 saved_prev_link = ctx->optimum[prev_link].prev.link;
1608 saved_prev_match_offset = ctx->optimum[prev_link].prev.match_offset;
1610 ctx->optimum[prev_link].next.link = cur_pos;
1611 ctx->optimum[prev_link].next.match_offset = prev_match_offset;
1613 cur_pos = prev_link;
1614 } while (cur_pos != 0);
1616 ctx->optimum_cur_idx = ctx->optimum[0].next.link;
1618 return (struct raw_match)
1619 { .len = ctx->optimum_cur_idx,
1620 .offset = ctx->optimum[0].next.match_offset,
1625 * lzx_lz_get_near_optimal_match() -
1627 * Choose the "best" match or literal to use at the next position in the input.
1629 * Unlike a "greedy" parser that always takes the longest match, or even a
1630 * parser with one match/literal look-ahead like zlib, the algorithm used here
1631 * may look ahead many matches/literals to determine the best match/literal to
1632 * output next. The motivation is that the compression ratio is improved if the
1633 * compressor can do things like use a shorter-than-possible match in order to
1634 * allow a longer match later, and also take into account the Huffman code cost
1635 * model rather than simply assuming that longer is better. It is not a true
1636 * "optimal" parser, however, since some shortcuts can be taken; for example, if
1637 * a match is very long, the parser just chooses it immediately before too much
1638 * time is wasting considering many different alternatives that are unlikely to
1641 * This algorithm is based on that used in 7-Zip's DEFLATE encoder.
1643 * Each call to this function does one of two things:
1645 * 1. Build a near-optimal sequence of matches/literals, up to some point, that
1646 * will be returned by subsequent calls to this function, then return the
1651 * 2. Return the next match/literal previously computed by a call to this
1654 * This function relies on the following state in the compressor context:
1656 * ctx->window (read-only: preprocessed data being compressed)
1657 * ctx->cost (read-only: cost model to use)
1658 * ctx->optimum (internal state; leave uninitialized)
1659 * ctx->optimum_cur_idx (must set to 0 before first call)
1660 * ctx->optimum_end_idx (must set to 0 before first call)
1661 * ctx->hash_tab (must set to 0 before first call)
1662 * ctx->cached_matches (internal state; leave uninitialized)
1663 * ctx->cached_matches_pos (initialize to 0 before first call; save and
1664 * restore value if restarting parse from a
1666 * ctx->match_window_pos (must initialize to position of next match to
1667 * return; subsequent calls return subsequent
1669 * ctx->match_window_end (must initialize to limit of match-finding region;
1670 * subsequent calls use the same limit)
1672 * The return value is a (length, offset) pair specifying the match or literal
1673 * chosen. For literals, length is either 0 or 1 and offset is meaningless.
1675 static struct raw_match
1676 lzx_lz_get_near_optimal_match(struct lzx_compressor * ctx)
1679 /* Testing: literals only */
1680 ctx->match_window_pos++;
1681 return (struct raw_match) { .len = 0 };
1683 /* Testing: greedy parsing */
1684 struct raw_match *matches;
1685 unsigned num_matches;
1686 struct raw_match match = {.len = 0};
1688 num_matches = lzx_lz_get_matches_caching(ctx, &matches);
1690 match = matches[num_matches - 1];
1691 lzx_lz_skip_bytes(ctx, match.len - 1);
1695 unsigned num_possible_matches;
1696 struct raw_match *possible_matches;
1697 struct raw_match match;
1698 unsigned longest_match_len;
1699 unsigned len, match_idx;
1701 if (ctx->optimum_cur_idx != ctx->optimum_end_idx) {
1702 /* Case 2: Return the next match/literal already found. */
1703 match.len = ctx->optimum[ctx->optimum_cur_idx].next.link -
1704 ctx->optimum_cur_idx;
1705 match.offset = ctx->optimum[ctx->optimum_cur_idx].next.match_offset;
1707 ctx->optimum_cur_idx = ctx->optimum[ctx->optimum_cur_idx].next.link;
1711 /* Case 1: Compute a new list of matches/literals to return. */
1713 ctx->optimum_cur_idx = 0;
1714 ctx->optimum_end_idx = 0;
1716 /* Get matches at this position. */
1717 num_possible_matches = lzx_lz_get_matches_caching(ctx, &possible_matches);
1719 /* If no matches found, return literal. */
1720 if (num_possible_matches == 0)
1721 return (struct raw_match){ .len = 0 };
1723 /* The matches that were found are sorted by length. Get the length of
1724 * the longest one. */
1725 longest_match_len = possible_matches[num_possible_matches - 1].len;
1727 /* Greedy heuristic: if the longest match that was found is greater
1728 * than the number of fast bytes, return it immediately; don't both
1729 * doing more work. */
1730 if (longest_match_len > ctx->params.slow.num_fast_bytes) {
1731 lzx_lz_skip_bytes(ctx, longest_match_len - 1);
1732 return possible_matches[num_possible_matches - 1];
1735 /* Calculate the cost to reach the next position by outputting a
1737 #if LZX_PARAM_ACCOUNT_FOR_LRU
1738 ctx->optimum[0].queue = ctx->queue;
1739 ctx->optimum[1].queue = ctx->optimum[0].queue;
1741 ctx->optimum[1].cost = lzx_literal_cost(ctx->window[ctx->match_window_pos],
1743 ctx->optimum[1].prev.link = 0;
1745 /* Calculate the cost to reach any position up to and including that
1746 * reached by the longest match, using the shortest (i.e. closest) match
1747 * that reaches each position. */
1749 BUILD_BUG_ON(LZX_MIN_MATCH != 2);
1750 for (len = LZX_MIN_MATCH; len <= longest_match_len; len++) {
1752 LZX_ASSERT(match_idx < num_possible_matches);
1754 #if LZX_PARAM_ACCOUNT_FOR_LRU
1755 ctx->optimum[len].queue = ctx->optimum[0].queue;
1757 ctx->optimum[len].prev.link = 0;
1758 ctx->optimum[len].prev.match_offset = possible_matches[match_idx].offset;
1759 ctx->optimum[len].cost = lzx_match_cost(len,
1760 possible_matches[match_idx].offset,
1762 #if LZX_PARAM_ACCOUNT_FOR_LRU
1763 , &ctx->optimum[len].queue
1766 if (len == possible_matches[match_idx].len)
1770 unsigned cur_pos = 0;
1772 /* len_end: greatest index forward at which costs have been calculated
1774 unsigned len_end = longest_match_len;
1778 /* Advance to next position. */
1781 if (cur_pos == len_end || cur_pos == LZX_PARAM_OPTIM_ARRAY_SIZE)
1782 return lzx_lz_reverse_near_optimal_match_list(ctx, cur_pos);
1784 /* retrieve the number of matches available at this position */
1785 num_possible_matches = lzx_lz_get_matches_caching(ctx,
1788 unsigned new_len = 0;
1790 if (num_possible_matches != 0) {
1791 new_len = possible_matches[num_possible_matches - 1].len;
1793 /* Greedy heuristic: if we found a match greater than
1794 * the number of fast bytes, stop immediately. */
1795 if (new_len > ctx->params.slow.num_fast_bytes) {
1797 /* Build the list of matches to return and get
1799 match = lzx_lz_reverse_near_optimal_match_list(ctx, cur_pos);
1801 /* Append the long match to the end of the list. */
1802 ctx->optimum[cur_pos].next.match_offset =
1803 possible_matches[num_possible_matches - 1].offset;
1804 ctx->optimum[cur_pos].next.link = cur_pos + new_len;
1805 ctx->optimum_end_idx = cur_pos + new_len;
1807 /* Skip over the remaining bytes of the long match. */
1808 lzx_lz_skip_bytes(ctx, new_len - 1);
1810 /* Return first match in the list */
1815 /* Consider proceeding with a literal byte. */
1816 u32 cur_cost = ctx->optimum[cur_pos].cost;
1817 u32 cur_plus_literal_cost = cur_cost +
1818 lzx_literal_cost(ctx->window[ctx->match_window_pos - 1],
1820 if (cur_plus_literal_cost < ctx->optimum[cur_pos + 1].cost) {
1821 ctx->optimum[cur_pos + 1].cost = cur_plus_literal_cost;
1822 ctx->optimum[cur_pos + 1].prev.link = cur_pos;
1823 #if LZX_PARAM_ACCOUNT_FOR_LRU
1824 ctx->optimum[cur_pos + 1].queue = ctx->optimum[cur_pos].queue;
1828 if (num_possible_matches == 0)
1831 /* Consider proceeding with a match. */
1833 while (len_end < cur_pos + new_len)
1834 ctx->optimum[++len_end].cost = ~(u32)0;
1837 for (len = LZX_MIN_MATCH; len <= new_len; len++) {
1838 LZX_ASSERT(match_idx < num_possible_matches);
1839 #if LZX_PARAM_ACCOUNT_FOR_LRU
1840 struct lzx_lru_queue q = ctx->optimum[cur_pos].queue;
1842 u32 cost = cur_cost + lzx_match_cost(len,
1843 possible_matches[match_idx].offset,
1845 #if LZX_PARAM_ACCOUNT_FOR_LRU
1850 if (cost < ctx->optimum[cur_pos + len].cost) {
1851 ctx->optimum[cur_pos + len].cost = cost;
1852 ctx->optimum[cur_pos + len].prev.link = cur_pos;
1853 ctx->optimum[cur_pos + len].prev.match_offset =
1854 possible_matches[match_idx].offset;
1855 #if LZX_PARAM_ACCOUNT_FOR_LRU
1856 ctx->optimum[cur_pos + len].queue = q;
1860 if (len == possible_matches[match_idx].len)
1868 lzx_huffman_code_output_cost(const u8 lens[restrict],
1869 const freq_t freqs[restrict],
1874 for (unsigned i = 0; i < num_syms; i++)
1875 cost += (unsigned)lens[i] * (unsigned)freqs[i];
1880 /* Return the number of bits required to output the lengths for the specified
1881 * Huffman code in compressed format (encoded with a precode). */
1883 lzx_code_cost(const u8 lens[], const u8 prev_lens[], unsigned num_syms)
1885 u8 output_syms[num_syms];
1886 freq_t precode_freqs[LZX_PRETREE_NUM_SYMBOLS];
1887 u8 precode_lens[LZX_PRETREE_NUM_SYMBOLS];
1888 u16 precode_codewords[LZX_PRETREE_NUM_SYMBOLS];
1890 unsigned num_additional_bits;
1892 /* Acount for the lengths of the precode itself. */
1893 cost += LZX_PRETREE_NUM_SYMBOLS * LZX_PRETREE_ELEMENT_SIZE;
1895 lzx_build_precode(lens, prev_lens, num_syms,
1896 precode_freqs, output_syms,
1897 precode_lens, precode_codewords,
1898 &num_additional_bits);
1900 /* Account for all precode symbols output. */
1901 cost += lzx_huffman_code_output_cost(precode_lens, precode_freqs,
1902 LZX_PRETREE_NUM_SYMBOLS);
1904 /* Account for additional bits. */
1905 cost += num_additional_bits;
1910 /* Account for extra bits in the main symbols. */
1912 lzx_update_mainsym_match_costs(int block_type,
1913 u8 main_lens[LZX_MAINTREE_NUM_SYMBOLS])
1917 LZX_ASSERT(block_type == LZX_BLOCKTYPE_ALIGNED ||
1918 block_type == LZX_BLOCKTYPE_VERBATIM);
1920 for (i = LZX_NUM_CHARS; i < LZX_MAINTREE_NUM_SYMBOLS; i++) {
1921 unsigned position_slot = (i >> 3) & 0x1f;
1923 /* If it's a verbatim block, add the number of extra bits
1924 * corresponding to the position slot.
1926 * If it's an aligned block and there would normally be at least
1927 * 3 extra bits, count 3 less because they will be output as an
1928 * aligned offset symbol instead. */
1929 unsigned num_extra_bits = lzx_get_num_extra_bits(position_slot);
1931 if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3)
1932 num_extra_bits -= 3;
1933 main_lens[i] += num_extra_bits;
1938 * Compute the costs, in bits, to output a compressed block as aligned offset
1942 * Number of bytes of uncompressed data the block represents.
1944 * Huffman codes that will be used when outputting the block.
1946 * Huffman codes for the previous block, or all zeroes if this is the first
1949 * Frequencies of Huffman symbols that will be output in the block.
1951 * Cost of aligned block will be returned here.
1952 * @verbatim_cost_ret
1953 * Cost of verbatim block will be returned here.
1956 lzx_compute_compressed_block_costs(unsigned block_size,
1957 const struct lzx_codes *codes,
1958 const struct lzx_codes *prev_codes,
1959 const struct lzx_freqs *freqs,
1960 unsigned * aligned_cost_ret,
1961 unsigned * verbatim_cost_ret)
1963 unsigned common_cost = 0;
1964 unsigned aligned_cost = 0;
1965 unsigned verbatim_cost = 0;
1967 u8 updated_main_lens[LZX_MAINTREE_NUM_SYMBOLS];
1969 /* Account for cost of block header. */
1970 common_cost += LZX_BLOCKTYPE_NBITS;
1971 if (block_size == LZX_DEFAULT_BLOCK_SIZE)
1974 common_cost += LZX_BLOCKSIZE_NBITS;
1976 /* Account for cost of outputting aligned offset code. */
1977 aligned_cost += LZX_ALIGNEDTREE_NUM_SYMBOLS * LZX_ALIGNEDTREE_ELEMENT_SIZE;
1979 /* Account for cost of outputting main and length codes. */
1980 common_cost += lzx_code_cost(codes->lens.main,
1981 prev_codes->lens.main,
1983 common_cost += lzx_code_cost(codes->lens.main + LZX_NUM_CHARS,
1984 prev_codes->lens.main + LZX_NUM_CHARS,
1985 LZX_MAINTREE_NUM_SYMBOLS - LZX_NUM_CHARS);
1986 common_cost += lzx_code_cost(codes->lens.len,
1987 prev_codes->lens.len,
1988 LZX_LENTREE_NUM_SYMBOLS);
1990 /* Account for cost to output main, length, and aligned symbols, taking
1991 * into account extra position bits. */
1993 memcpy(updated_main_lens, codes->lens.main, LZX_MAINTREE_NUM_SYMBOLS);
1994 lzx_update_mainsym_match_costs(LZX_BLOCKTYPE_VERBATIM, updated_main_lens);
1995 verbatim_cost += lzx_huffman_code_output_cost(updated_main_lens,
1997 LZX_MAINTREE_NUM_SYMBOLS);
1998 memcpy(updated_main_lens, codes->lens.main, LZX_MAINTREE_NUM_SYMBOLS);
1999 lzx_update_mainsym_match_costs(LZX_BLOCKTYPE_ALIGNED, updated_main_lens);
2000 aligned_cost += lzx_huffman_code_output_cost(updated_main_lens,
2002 LZX_MAINTREE_NUM_SYMBOLS);
2004 common_cost += lzx_huffman_code_output_cost(codes->lens.len,
2006 LZX_LENTREE_NUM_SYMBOLS);
2008 aligned_cost += lzx_huffman_code_output_cost(codes->lens.aligned,
2010 LZX_ALIGNEDTREE_NUM_SYMBOLS);
2012 *aligned_cost_ret = aligned_cost + common_cost;
2013 *verbatim_cost_ret = verbatim_cost + common_cost;
2016 /* Prepare a (nonsplit) compressed block. */
2018 lzx_prepare_compressed_block(struct lzx_compressor *ctx, unsigned block_number,
2019 struct lzx_codes *prev_codes)
2021 struct lzx_block_spec *spec = &ctx->block_specs[block_number - 1];
2022 unsigned orig_cached_matches_pos = ctx->cached_matches_pos;
2023 struct lzx_lru_queue orig_queue = ctx->queue;
2024 struct lzx_freqs freqs;
2027 /* Here's where the real work happens. The following loop runs one or
2028 * more times, each time using a cost model based on the Huffman codes
2029 * computed from the previous iteration (the first iteration uses a
2030 * default model). Each iteration of the loop uses a heuristic
2031 * algorithm to divide the block into near-optimal matches/literals from
2032 * beginning to end. */
2033 LZX_ASSERT(ctx->params.slow.num_optim_passes >= 1);
2034 spec->num_chosen_matches = 0;
2035 for (unsigned pass = 0; pass < ctx->params.slow.num_optim_passes; pass++)
2037 LZX_DEBUG("Block %u: Match-choosing pass %u of %u",
2038 block_number, pass + 1,
2039 ctx->params.slow.num_optim_passes);
2041 /* Reset frequency tables. */
2042 memset(&freqs, 0, sizeof(freqs));
2044 /* Reset match offset LRU queue. */
2045 ctx->queue = orig_queue;
2047 /* Reset match-finding position. */
2048 ctx->cached_matches_pos = orig_cached_matches_pos;
2049 ctx->match_window_pos = spec->window_pos;
2050 ctx->match_window_end = spec->window_pos + spec->block_size;
2052 /* Set cost model. */
2053 lzx_set_costs(ctx, &spec->codes.lens);
2055 unsigned window_pos = spec->window_pos;
2056 unsigned end = window_pos + spec->block_size;
2058 while (window_pos < end) {
2059 struct raw_match match;
2060 struct lzx_match lzx_match;
2062 match = lzx_lz_get_near_optimal_match(ctx);
2064 if (match.len >= LZX_MIN_MATCH) {
2066 /* Best to output a match here. */
2068 LZX_ASSERT(match.len <= LZX_MAX_MATCH);
2069 LZX_ASSERT(!memcmp(&ctx->window[window_pos],
2070 &ctx->window[window_pos - match.offset],
2073 /* Tally symbol frequencies. */
2074 lzx_match.data = lzx_record_match(match.offset,
2079 window_pos += match.len;
2081 /* Best to output a literal here. */
2083 /* Tally symbol frequencies. */
2084 lzx_match.data = lzx_record_literal(ctx->window[window_pos],
2090 /* If it's the last pass, save the match/literal in
2091 * intermediate form. */
2092 if (pass == ctx->params.slow.num_optim_passes - 1) {
2093 ctx->chosen_matches[spec->chosen_matches_start_pos +
2094 spec->num_chosen_matches] = lzx_match;
2096 spec->num_chosen_matches++;
2099 LZX_ASSERT(window_pos == end);
2101 /* Build Huffman codes using the new frequencies. */
2102 lzx_make_huffman_codes(&freqs, &spec->codes);
2104 /* The first time we get here is when the full input has been
2105 * processed, so the match-finding is done. */
2106 ctx->matches_already_found = true;
2109 LZX_DEBUG("Block %u: saved %u matches/literals @ %u",
2110 block_number, spec->num_chosen_matches,
2111 spec->chosen_matches_start_pos);
2113 unsigned aligned_cost;
2114 unsigned verbatim_cost;
2116 lzx_compute_compressed_block_costs(spec->block_size,
2123 /* Choose whether to make the block aligned offset or verbatim. */
2124 if (aligned_cost < verbatim_cost) {
2125 spec->block_type = LZX_BLOCKTYPE_ALIGNED;
2126 cost = aligned_cost;
2127 LZX_DEBUG("Using aligned block (cost %u vs %u for verbatim)",
2128 aligned_cost, verbatim_cost);
2130 spec->block_type = LZX_BLOCKTYPE_VERBATIM;
2131 cost = verbatim_cost;
2132 LZX_DEBUG("Using verbatim block (cost %u vs %u for aligned)",
2133 verbatim_cost, aligned_cost);
2136 LZX_DEBUG("Block %u is %u => %u bytes unsplit.",
2137 block_number, spec->block_size, cost / 8);
2143 * lzx_prepare_block_recursive() -
2145 * Given a (possibly nonproper) sub-sequence of the preprocessed input, compute
2146 * the LZX block(s) that it should be output as.
2148 * This function initially considers the case where the given sub-sequence of
2149 * the preprocessed input be output as a single block. This block is calculated
2150 * and its cost (number of bits required to output it) is computed.
2152 * Then, if @max_split_level is greater than zero, a split into two evenly sized
2153 * subblocks is considered. The block is recursively split in this way,
2154 * potentially up to the depth specified by @max_split_level. The cost of the
2155 * split block is compared to the cost of the single block, and the lower cost
2158 * For each compressed output block computed, the sequence of matches/literals
2159 * and the corresponding Huffman codes for the block are produced and saved.
2161 * The return value is the approximate number of bits the block (or all
2162 * subblocks, in the case that the split block had lower cost), will take up
2163 * when written to the compressed output.
2166 lzx_prepare_block_recursive(struct lzx_compressor * ctx,
2167 unsigned block_number,
2168 unsigned max_split_level,
2169 struct lzx_codes **prev_codes_p)
2171 struct lzx_block_spec *spec = &ctx->block_specs[block_number - 1];
2173 unsigned orig_cached_matches_pos;
2174 struct lzx_lru_queue orig_queue, nonsplit_queue;
2175 struct lzx_codes *prev_codes = *prev_codes_p;
2177 LZX_DEBUG("Preparing block %u...", block_number);
2179 /* Save positions of chosen and cached matches, and the match offset LRU
2180 * queue, so that they can be restored if splitting is attempted. */
2181 orig_cached_matches_pos = ctx->cached_matches_pos;
2182 orig_queue = ctx->queue;
2184 /* Consider outputting the input subsequence as a single block. */
2186 cost = lzx_prepare_compressed_block(ctx, block_number, prev_codes);
2187 nonsplit_queue = ctx->queue;
2189 *prev_codes_p = &spec->codes;
2191 /* If the maximum split level is at least one, consider splitting the
2193 if (max_split_level--) {
2195 LZX_DEBUG("Calculating split of block %u...", block_number);
2197 struct lzx_block_spec *spec1, *spec2;
2198 unsigned split_cost;
2200 ctx->cached_matches_pos = orig_cached_matches_pos;
2201 ctx->queue = orig_queue;
2203 /* Prepare and get the cost of the first sub-block. */
2204 spec1 = &ctx->block_specs[block_number * 2 - 1];
2205 spec1->codes.lens = spec->codes.lens;
2206 spec1->window_pos = spec->window_pos;
2207 spec1->block_size = spec->block_size / 2;
2208 spec1->chosen_matches_start_pos = spec->chosen_matches_start_pos +
2209 LZX_MAX_WINDOW_SIZE;
2210 split_cost = lzx_prepare_block_recursive(ctx,
2215 /* Prepare and get the cost of the second sub-block. */
2217 spec2->codes.lens = spec->codes.lens;
2218 spec2->window_pos = spec->window_pos + spec1->block_size;
2219 spec2->block_size = spec->block_size - spec1->block_size;
2220 spec2->chosen_matches_start_pos = spec1->chosen_matches_start_pos +
2222 split_cost += lzx_prepare_block_recursive(ctx,
2223 block_number * 2 + 1,
2227 /* Compare the cost of the whole block with that of the split
2228 * block. Choose the lower cost solution. */
2229 if (split_cost < cost) {
2230 LZX_DEBUG("Splitting block %u is worth it "
2231 "(%u => %u bytes).",
2232 block_number, cost / 8, split_cost / 8);
2235 *prev_codes_p = prev_codes;
2237 LZX_DEBUG("Splitting block %u is NOT worth it "
2238 "(%u => %u bytes).",
2239 block_number, cost / 8, split_cost / 8);
2240 ctx->queue = nonsplit_queue;
2247 /* Empirical averages */
2248 static const u8 lzx_default_mainsym_costs[LZX_MAINTREE_NUM_SYMBOLS] = {
2249 7, 9, 9, 10, 9, 10, 10, 10, 9, 10, 9, 10, 10, 9, 10, 10, 9, 10, 10, 11,
2250 10, 10, 10, 11, 10, 11, 11, 11, 10, 11, 11, 11, 8, 11, 9, 10, 9, 10, 11,
2251 11, 9, 9, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 9, 9, 9, 8, 8, 9, 9, 9, 9,
2252 10, 10, 10, 8, 9, 8, 8, 8, 8, 9, 9, 9, 10, 10, 8, 8, 9, 9, 8, 10, 9, 8,
2253 8, 9, 8, 9, 9, 10, 10, 10, 9, 10, 11, 9, 10, 8, 9, 8, 8, 8, 8, 9, 8, 8,
2254 9, 9, 8, 8, 8, 8, 8, 10, 8, 8, 7, 8, 9, 9, 9, 9, 10, 11, 10, 10, 11, 11,
2255 10, 11, 11, 10, 10, 11, 11, 11, 10, 10, 11, 10, 11, 10, 11, 11, 10, 11,
2256 11, 12, 11, 11, 11, 12, 11, 11, 11, 11, 11, 11, 11, 12, 10, 11, 11, 11,
2257 11, 11, 11, 12, 11, 11, 11, 11, 11, 12, 11, 11, 10, 11, 11, 11, 11, 11,
2258 11, 11, 10, 11, 11, 11, 11, 11, 11, 11, 10, 11, 11, 11, 11, 11, 11, 11,
2259 10, 11, 11, 11, 11, 11, 11, 11, 10, 11, 11, 11, 11, 12, 11, 11, 10, 11,
2260 11, 11, 11, 12, 11, 11, 10, 11, 11, 11, 10, 12, 11, 11, 10, 10, 11, 10,
2261 10, 11, 11, 11, 10, 11, 11, 11, 10, 11, 11, 11, 10, 11, 11, 11, 10, 11,
2262 10, 9, 8, 7, 10, 10, 11, 10, 11, 7, 9, 9, 11, 11, 11, 12, 11, 9, 10, 10,
2263 12, 12, 13, 13, 12, 11, 10, 12, 12, 14, 14, 14, 13, 12, 9, 12, 13, 14,
2264 14, 14, 14, 14, 9, 10, 13, 14, 14, 14, 14, 14, 9, 9, 11, 11, 13, 13, 13,
2265 14, 9, 9, 11, 12, 12, 13, 13, 13, 8, 8, 11, 11, 12, 12, 12, 11, 9, 9,
2266 10, 11, 12, 12, 12, 11, 8, 9, 10, 10, 11, 12, 11, 10, 9, 9, 10, 11, 11,
2267 12, 11, 10, 8, 9, 10, 10, 11, 11, 11, 9, 9, 9, 10, 11, 11, 11, 11, 9, 8,
2268 8, 10, 10, 11, 11, 11, 9, 9, 9, 10, 10, 11, 11, 11, 9, 9, 8, 9, 10, 11,
2269 11, 11, 9, 10, 9, 10, 11, 11, 11, 11, 9, 14, 9, 9, 10, 10, 11, 10, 9,
2270 14, 9, 10, 11, 11, 11, 11, 9, 14, 9, 10, 10, 11, 11, 11, 9, 14, 10, 10,
2271 11, 11, 12, 11, 10, 14, 10, 10, 10, 11, 11, 11, 10, 14, 11, 11, 11, 11,
2272 12, 12, 10, 14, 10, 11, 11, 11, 12, 11, 10, 14, 11, 11, 11, 12, 12, 12,
2273 11, 15, 11, 11, 11, 12, 12, 12, 11, 14, 12, 12, 12, 12, 13, 12, 11, 15,
2274 12, 12, 12, 13, 13, 13, 12, 15, 14, 13, 14, 14, 14, 14, 13,
2277 /* Empirical averages */
2278 static const u8 lzx_default_lensym_costs[LZX_LENTREE_NUM_SYMBOLS] = {
2279 5, 5, 5, 5, 5, 6, 5, 5, 6, 7, 7, 7, 8, 8, 7, 8, 9, 9, 9, 9, 10, 9, 9,
2280 10, 9, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 11, 12, 12,
2281 12, 12, 12, 12, 13, 12, 12, 12, 13, 12, 13, 13, 12, 12, 13, 12, 13, 13,
2282 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 13, 14, 13, 14, 13,
2283 14, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2284 14, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2285 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2286 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2287 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2288 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2289 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2290 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2291 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
2292 14, 14, 14, 14, 14, 14, 14, 14, 14, 10,
2296 * Set default symbol costs.
2299 lzx_set_default_costs(struct lzx_lens * lens)
2303 #if LZX_PARAM_USE_EMPIRICAL_DEFAULT_COSTS
2304 memcpy(&lens->main, lzx_default_mainsym_costs, LZX_MAINTREE_NUM_SYMBOLS);
2305 memcpy(&lens->len, lzx_default_lensym_costs, LZX_LENTREE_NUM_SYMBOLS);
2308 /* Literal symbols */
2309 for (i = 0; i < LZX_NUM_CHARS; i++)
2312 /* Match header symbols */
2313 for (; i < LZX_MAINTREE_NUM_SYMBOLS; i++)
2316 /* Length symbols */
2317 for (i = 0; i < LZX_LENTREE_NUM_SYMBOLS; i++)
2321 /* Aligned offset symbols */
2322 for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
2323 lens->aligned[i] = 3;
2327 * lzx_prepare_blocks() -
2329 * Calculate the blocks to split the preprocessed data into.
2331 * Input --- the preprocessed data:
2340 * ctx->cached_matches
2341 * ctx->cached_matches_pos
2342 * ctx->matches_already_found
2344 * Block cost modeling:
2346 * ctx->block_specs (also an output)
2350 * ctx->optimum_cur_idx
2351 * ctx->optimum_end_idx
2352 * ctx->chosen_matches (also an output)
2354 * Output --- the block specifications and the corresponding match/literal data:
2356 * ctx->block_specs[]
2357 * ctx->chosen_matches[]
2359 * The return value is the approximate number of bits the compressed data will
2363 lzx_prepare_blocks(struct lzx_compressor * ctx)
2365 /* This function merely does some initializations, then passes control
2366 * to lzx_prepare_block_recursive(). */
2368 /* 1. Initialize match-finding variables. */
2370 /* Zero all entries in the hash table, indicating that no length-3
2371 * character sequences have been discovered in the input yet. */
2372 memset(ctx->hash_tab, 0, LZX_LZ_HASH_SIZE * 2 * sizeof(ctx->hash_tab[0]));
2373 if (ctx->params.slow.use_len2_matches)
2374 memset(ctx->digram_tab, 0, 256 * 256 * sizeof(ctx->digram_tab[0]));
2375 /* Note: ctx->child_tab need not be initialized. */
2377 /* No matches have been found and cached yet. */
2378 ctx->cached_matches_pos = 0;
2379 ctx->matches_already_found = false;
2381 /* 2. Initialize match-choosing variables. */
2382 ctx->optimum_cur_idx = 0;
2383 ctx->optimum_end_idx = 0;
2384 /* Note: ctx->optimum need not be initialized. */
2385 ctx->block_specs[0].chosen_matches_start_pos = 0;
2387 /* 3. Set block 1 (index 0) to represent the entire input data. */
2388 ctx->block_specs[0].block_size = ctx->window_size;
2389 ctx->block_specs[0].window_pos = 0;
2391 /* 4. Set up a default Huffman symbol cost model for block 1 (index 0).
2392 * The model will be refined later. */
2393 lzx_set_default_costs(&ctx->block_specs[0].codes.lens);
2395 /* 5. Initialize the match offset LRU queue. */
2396 ctx->queue = (struct lzx_lru_queue){1, 1, 1};
2398 /* 6. Pass control to recursive procedure. */
2399 struct lzx_codes * prev_codes = &ctx->zero_codes;
2400 return lzx_prepare_block_recursive(ctx, 1,
2401 ctx->params.slow.num_split_passes,
2406 * This is the fast version of lzx_prepare_blocks(). This version "quickly"
2407 * prepares a single compressed block containing the entire input. See the
2408 * description of the "Fast algorithm" at the beginning of this file for more
2411 * Input --- the preprocessed data:
2419 * Output --- the block specifications and the corresponding match/literal data:
2421 * ctx->block_specs[]
2422 * ctx->chosen_matches[]
2425 lzx_prepare_block_fast(struct lzx_compressor * ctx)
2427 unsigned num_matches;
2428 struct lzx_freqs freqs;
2429 struct lzx_block_spec *spec;
2431 /* Parameters to hash chain LZ match finder */
2432 static const struct lz_params lzx_lz_params = {
2433 /* LZX_MIN_MATCH == 2, but 2-character matches are rarely
2434 * useful; the minimum match for compression is set to 3
2437 .max_match = LZX_MAX_MATCH,
2438 .good_match = LZX_MAX_MATCH,
2439 .nice_match = LZX_MAX_MATCH,
2440 .max_chain_len = LZX_MAX_MATCH,
2441 .max_lazy_match = LZX_MAX_MATCH,
2445 /* Initialize symbol frequencies and match offset LRU queue. */
2446 memset(&freqs, 0, sizeof(struct lzx_freqs));
2447 ctx->queue = (struct lzx_lru_queue){ 1, 1, 1 };
2449 /* Determine series of matches/literals to output. */
2450 num_matches = lz_analyze_block(ctx->window,
2452 (u32*)ctx->chosen_matches,
2461 /* Set up block specification. */
2462 spec = &ctx->block_specs[0];
2464 spec->block_type = LZX_BLOCKTYPE_ALIGNED;
2465 spec->window_pos = 0;
2466 spec->block_size = ctx->window_size;
2467 spec->num_chosen_matches = num_matches;
2468 spec->chosen_matches_start_pos = 0;
2469 lzx_make_huffman_codes(&freqs, &spec->codes);
2473 do_call_insn_translation(u32 *call_insn_target, int input_pos,
2479 rel_offset = le32_to_cpu(*call_insn_target);
2480 if (rel_offset >= -input_pos && rel_offset < file_size) {
2481 if (rel_offset < file_size - input_pos) {
2482 /* "good translation" */
2483 abs_offset = rel_offset + input_pos;
2485 /* "compensating translation" */
2486 abs_offset = rel_offset - file_size;
2488 *call_insn_target = cpu_to_le32(abs_offset);
2492 /* This is the reverse of undo_call_insn_preprocessing() in lzx-decompress.c.
2493 * See the comment above that function for more information. */
2495 do_call_insn_preprocessing(u8 data[], int size)
2497 for (int i = 0; i < size - 10; i++) {
2498 if (data[i] == 0xe8) {
2499 do_call_insn_translation((u32*)&data[i + 1], i,
2500 LZX_WIM_MAGIC_FILESIZE);
2506 /* API function documented in wimlib.h */
2508 wimlib_lzx_compress2(const void * const restrict uncompressed_data,
2509 unsigned const uncompressed_len,
2510 void * const restrict compressed_data,
2511 struct wimlib_lzx_context * const restrict lzx_ctx)
2513 struct lzx_compressor *ctx = (struct lzx_compressor*)lzx_ctx;
2514 struct output_bitstream ostream;
2515 unsigned compressed_len;
2517 if (uncompressed_len < 100) {
2518 LZX_DEBUG("Too small to bother compressing.");
2522 if (uncompressed_len > 32768) {
2523 LZX_DEBUG("Only up to 32768 bytes of uncompressed data are supported.");
2527 wimlib_assert(lzx_ctx != NULL);
2529 LZX_DEBUG("Attempting to compress %u bytes...", uncompressed_len);
2531 /* The input data must be preprocessed. To avoid changing the original
2532 * input, copy it to a temporary buffer. */
2533 memcpy(ctx->window, uncompressed_data, uncompressed_len);
2534 ctx->window_size = uncompressed_len;
2536 /* This line is unnecessary; it just avoids inconsequential accesses of
2537 * uninitialized memory that would show up in memory-checking tools such
2539 memset(&ctx->window[ctx->window_size], 0, 12);
2541 LZX_DEBUG("Preprocessing data...");
2543 /* Before doing any actual compression, do the call instruction (0xe8
2544 * byte) translation on the uncompressed data. */
2545 do_call_insn_preprocessing(ctx->window, ctx->window_size);
2547 LZX_DEBUG("Preparing blocks...");
2549 /* Prepare the compressed data. */
2550 if (ctx->params.algorithm == WIMLIB_LZX_ALGORITHM_FAST)
2551 lzx_prepare_block_fast(ctx);
2553 lzx_prepare_blocks(ctx);
2555 LZX_DEBUG("Writing compressed blocks...");
2557 /* Generate the compressed data. */
2558 init_output_bitstream(&ostream, compressed_data, ctx->window_size - 1);
2559 lzx_write_all_blocks(ctx, &ostream);
2561 LZX_DEBUG("Flushing bitstream...");
2562 if (flush_output_bitstream(&ostream)) {
2563 /* If the bitstream cannot be flushed, then the output space was
2565 LZX_DEBUG("Data did not compress to less than original length!");
2569 /* Compute the length of the compressed data. */
2570 compressed_len = ostream.bit_output - (u8*)compressed_data;
2572 LZX_DEBUG("Done: compressed %u => %u bytes.",
2573 uncompressed_len, compressed_len);
2575 #if defined(ENABLE_LZX_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION)
2576 /* Verify that we really get the same thing back when decompressing. */
2578 u8 buf[uncompressed_len];
2582 ret = wimlib_lzx_decompress(compressed_data, compressed_len,
2583 buf, uncompressed_len);
2585 ERROR("Failed to decompress data we "
2586 "compressed using LZX algorithm");
2592 const u8 * udata = uncompressed_data;
2593 for (i = 0; i < uncompressed_len; i++) {
2594 if (buf[i] != udata[i]) {
2596 ERROR("Data we compressed using LZX algorithm "
2597 "didn't decompress to original "
2598 "(difference at idx %u: c %#02x, u %#02x)",
2599 i, buf[i], udata[i]);
2608 return compressed_len;
2612 lzx_params_compatible(const struct wimlib_lzx_params *oldparams,
2613 const struct wimlib_lzx_params *newparams)
2615 return 0 == memcmp(oldparams, newparams, sizeof(struct wimlib_lzx_params));
2618 /* API function documented in wimlib.h */
2620 wimlib_lzx_alloc_context(const struct wimlib_lzx_params *params,
2621 struct wimlib_lzx_context **ctx_pp)
2624 LZX_DEBUG("Allocating LZX context...");
2626 struct lzx_compressor *ctx;
2628 static const struct wimlib_lzx_params fast_default = {
2629 .size_of_this = sizeof(struct wimlib_lzx_params),
2630 .algorithm = WIMLIB_LZX_ALGORITHM_FAST,
2635 static const struct wimlib_lzx_params slow_default = {
2636 .size_of_this = sizeof(struct wimlib_lzx_params),
2637 .algorithm = WIMLIB_LZX_ALGORITHM_SLOW,
2640 .use_len2_matches = 1,
2641 .num_fast_bytes = 32,
2642 .num_optim_passes = 3,
2643 .num_split_passes = 3,
2644 .main_nostat_cost = 15,
2645 .len_nostat_cost = 15,
2646 .aligned_nostat_cost = 7,
2650 if (params == NULL) {
2651 LZX_DEBUG("Using default algorithm and parameters.");
2652 params = &slow_default;
2655 if (params->algorithm != WIMLIB_LZX_ALGORITHM_SLOW &&
2656 params->algorithm != WIMLIB_LZX_ALGORITHM_FAST)
2658 LZX_DEBUG("Invalid algorithm.");
2659 return WIMLIB_ERR_INVALID_PARAM;
2662 if (params->use_defaults) {
2663 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW)
2664 params = &slow_default;
2666 params = &fast_default;
2669 if (params->size_of_this != sizeof(struct wimlib_lzx_params)) {
2670 LZX_DEBUG("Invalid parameter structure size!");
2671 return WIMLIB_ERR_INVALID_PARAM;
2674 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
2675 if (params->slow.num_fast_bytes < 3 ||
2676 params->slow.num_fast_bytes > 257)
2678 LZX_DEBUG("Invalid number of fast bytes!");
2679 return WIMLIB_ERR_INVALID_PARAM;
2682 if (params->slow.num_optim_passes < 1)
2684 LZX_DEBUG("Invalid number of optimization passes!");
2685 return WIMLIB_ERR_INVALID_PARAM;
2688 if (params->slow.main_nostat_cost < 1 ||
2689 params->slow.main_nostat_cost > 16)
2691 LZX_DEBUG("Invalid main_nostat_cost!");
2692 return WIMLIB_ERR_INVALID_PARAM;
2695 if (params->slow.len_nostat_cost < 1 ||
2696 params->slow.len_nostat_cost > 16)
2698 LZX_DEBUG("Invalid len_nostat_cost!");
2699 return WIMLIB_ERR_INVALID_PARAM;
2702 if (params->slow.aligned_nostat_cost < 1 ||
2703 params->slow.aligned_nostat_cost > 8)
2705 LZX_DEBUG("Invalid aligned_nostat_cost!");
2706 return WIMLIB_ERR_INVALID_PARAM;
2710 if (ctx_pp == NULL) {
2711 LZX_DEBUG("Check parameters only.");
2715 ctx = *(struct lzx_compressor**)ctx_pp;
2717 if (ctx && lzx_params_compatible(&ctx->params, params))
2720 LZX_DEBUG("Allocating memory.");
2722 ctx = MALLOC(sizeof(struct lzx_compressor));
2726 size_t block_specs_length;
2728 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW)
2729 block_specs_length = ((1 << (params->slow.num_split_passes + 1)) - 1);
2731 block_specs_length = 1;
2732 ctx->block_specs = MALLOC(block_specs_length * sizeof(ctx->block_specs[0]));
2733 if (ctx->block_specs == NULL)
2736 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
2737 ctx->hash_tab = MALLOC((LZX_LZ_HASH_SIZE + 2 * LZX_MAX_WINDOW_SIZE) *
2738 sizeof(ctx->hash_tab[0]));
2739 if (ctx->hash_tab == NULL)
2740 goto err_free_block_specs;
2741 ctx->child_tab = ctx->hash_tab + LZX_LZ_HASH_SIZE;
2743 ctx->hash_tab = NULL;
2744 ctx->child_tab = NULL;
2747 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW &&
2748 params->slow.use_len2_matches)
2750 ctx->digram_tab = MALLOC(256 * 256 * sizeof(ctx->digram_tab[0]));
2751 if (ctx->digram_tab == NULL)
2752 goto err_free_hash_tab;
2754 ctx->digram_tab = NULL;
2757 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
2758 ctx->cached_matches = MALLOC(10 * LZX_MAX_WINDOW_SIZE *
2759 sizeof(ctx->cached_matches[0]));
2760 if (ctx->cached_matches == NULL)
2761 goto err_free_digram_tab;
2763 ctx->cached_matches = NULL;
2766 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
2767 ctx->optimum = MALLOC((LZX_PARAM_OPTIM_ARRAY_SIZE + LZX_MAX_MATCH) *
2768 sizeof(ctx->optimum[0]));
2769 if (ctx->optimum == NULL)
2770 goto err_free_cached_matches;
2772 ctx->optimum = NULL;
2775 size_t chosen_matches_length;
2776 if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW)
2777 chosen_matches_length = LZX_MAX_WINDOW_SIZE *
2778 (params->slow.num_split_passes + 1);
2780 chosen_matches_length = LZX_MAX_WINDOW_SIZE;
2782 ctx->chosen_matches = MALLOC(chosen_matches_length *
2783 sizeof(ctx->chosen_matches[0]));
2784 if (ctx->chosen_matches == NULL)
2785 goto err_free_optimum;
2787 memcpy(&ctx->params, params, sizeof(struct wimlib_lzx_params));
2788 memset(&ctx->zero_codes, 0, sizeof(ctx->zero_codes));
2790 LZX_DEBUG("Successfully allocated new LZX context.");
2792 wimlib_lzx_free_context(*ctx_pp);
2793 *ctx_pp = (struct wimlib_lzx_context*)ctx;
2798 err_free_cached_matches:
2799 FREE(ctx->cached_matches);
2800 err_free_digram_tab:
2801 FREE(ctx->digram_tab);
2803 FREE(ctx->hash_tab);
2804 err_free_block_specs:
2805 FREE(ctx->block_specs);
2809 LZX_DEBUG("Ran out of memory.");
2810 return WIMLIB_ERR_NOMEM;
2813 /* API function documented in wimlib.h */
2815 wimlib_lzx_free_context(struct wimlib_lzx_context *_ctx)
2817 struct lzx_compressor *ctx = (struct lzx_compressor*)_ctx;
2820 FREE(ctx->chosen_matches);
2822 FREE(ctx->cached_matches);
2823 FREE(ctx->digram_tab);
2824 FREE(ctx->hash_tab);
2825 FREE(ctx->block_specs);
2830 /* API function documented in wimlib.h */
2832 wimlib_lzx_compress(const void * const restrict uncompressed_data,
2833 unsigned const uncompressed_len,
2834 void * const restrict compressed_data)
2837 struct wimlib_lzx_context *ctx;
2838 unsigned compressed_len;
2840 ret = wimlib_lzx_alloc_context(NULL, &ctx);
2842 wimlib_assert(ret != WIMLIB_ERR_INVALID_PARAM);
2843 WARNING("Couldn't allocate LZX compression context: %"TS"",
2844 wimlib_get_error_string(ret));
2848 compressed_len = wimlib_lzx_compress2(uncompressed_data,
2853 wimlib_lzx_free_context(ctx);
2855 return compressed_len;