4 * LZX compression routines, originally based on code written by Matthew T.
5 * Russotto (liblzxcomp), but heavily modified.
9 * Copyright (C) 2002 Matthew T. Russotto
10 * Copyright (C) 2012 Eric Biggers
12 * This file is part of wimlib, a library for working with WIM files.
14 * wimlib is free software; you can redistribute it and/or modify it under the
15 * terms of the GNU General Public License as published by the Free
16 * Software Foundation; either version 3 of the License, or (at your option)
19 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21 * A PARTICULAR PURPOSE. See the GNU General Public License for more
24 * You should have received a copy of the GNU General Public License
25 * along with wimlib; if not, see http://www.gnu.org/licenses/.
30 * This file provides lzx_compress(), a function to compress an in-memory buffer
31 * of data using LZX compression, as used in the WIM file format.
33 * Please see the comments in lzx-decomp.c for more information about this
36 * One thing to keep in mind is that there is no sliding window, since the
37 * window is always the entirety of a WIM chunk, which is at most WIM_CHUNK_SIZE
40 * The basic compression algorithm used here should be familiar if you are
41 * familiar with Huffman trees and with other LZ77 and Huffman-based formats
42 * such as DEFLATE. Otherwise it can be quite tricky to understand. Basically
43 * it is the following:
45 * - Preprocess the input data (LZX-specific)
46 * - Go through the input data and determine matches. This part is based on
47 * code from zlib, and a hash table of 3-character strings is used to
48 * accelerate the process of finding matches.
49 * - Build the Huffman trees based on the frequencies of symbols determined
50 * while recording matches.
51 * - Output the block header, including the Huffman trees; then output the
52 * compressed stream of matches and literal characters.
54 * It is possible for a WIM chunk to include multiple LZX blocks, since for some
55 * input data this will produce a better compression ratio (especially since
56 * each block can include new Huffman codes). However, producing multiple LZX
57 * blocks from one input chunk is not yet implemented.
67 /* Structure to contain the Huffman codes for the main, length, and aligned
70 u16 main_codewords[LZX_MAINTREE_NUM_SYMBOLS];
71 u8 main_lens[LZX_MAINTREE_NUM_SYMBOLS];
73 u16 len_codewords[LZX_LENTREE_NUM_SYMBOLS];
74 u8 len_lens[LZX_LENTREE_NUM_SYMBOLS];
76 u16 aligned_codewords[LZX_ALIGNEDTREE_NUM_SYMBOLS];
77 u8 aligned_lens[LZX_ALIGNEDTREE_NUM_SYMBOLS];
80 struct lzx_freq_tables {
81 u32 main_freq_table[LZX_MAINTREE_NUM_SYMBOLS];
82 u32 len_freq_table[LZX_LENTREE_NUM_SYMBOLS];
83 u32 aligned_freq_table[LZX_ALIGNEDTREE_NUM_SYMBOLS];
89 /* Returns the LZX position slot that corresponds to a given formatted offset.
91 * Logically, this returns the smallest i such that
92 * formatted_offset >= lzx_position_base[i].
94 * The actual implementation below takes advantage of the regularity of the
95 * numbers in the lzx_position_base array to calculate the slot directly from
96 * the formatted offset without actually looking at the array.
98 static inline unsigned lzx_get_position_slot(unsigned formatted_offset)
102 * Slots 36-49 (formatted_offset >= 262144) can be found by
103 * (formatted_offset/131072) + 34 == (formatted_offset >> 17) + 34;
104 * however, this check for formatted_offset >= 262144 is commented out
105 * because WIM chunks cannot be that large.
107 if (formatted_offset >= 262144) {
108 return (formatted_offset >> 17) + 34;
112 /* Note: this part here only works if:
114 * 2 <= formatted_offset < 655360
116 * It is < 655360 because the frequency of the position bases
117 * increases starting at the 655360 entry, and it is >= 2
118 * because the below calculation fails if the most significant
119 * bit is lower than the 2's place. */
120 wimlib_assert(formatted_offset >= 2 && formatted_offset < 655360);
121 unsigned mssb_idx = bsr32(formatted_offset);
122 return (mssb_idx << 1) |
123 ((formatted_offset >> (mssb_idx - 1)) & 1);
127 static u32 lzx_record_literal(u8 literal, void *__main_freq_tab)
129 u32 *main_freq_tab = __main_freq_tab;
130 main_freq_tab[literal]++;
134 /* Equivalent to lzx_extra_bits[position_slot] except position_slot must be
135 * between 2 and 37 */
136 static inline unsigned lzx_get_num_extra_bits(unsigned position_slot)
139 return lzx_extra_bits[position_slot];
141 wimlib_assert(position_slot >= 2 && position_slot <= 37);
142 return (position_slot >> 1) - 1;
145 /* Constructs a match from an offset and a length, and updates the LRU queue and
146 * the frequency of symbols in the main, length, and aligned offset alphabets.
147 * The return value is a 32-bit number that provides the match in an
148 * intermediate representation documented below. */
149 static u32 lzx_record_match(uint match_offset, uint match_len,
150 void *__freq_tabs, void *__queue)
152 struct lzx_freq_tables *freq_tabs = __freq_tabs;
153 struct lru_queue *queue = __queue;
154 uint formatted_offset;
156 uint position_footer = 0;
161 uint adjusted_match_len;
163 wimlib_assert(match_len >= LZX_MIN_MATCH && match_len <= LZX_MAX_MATCH);
164 wimlib_assert(match_offset != 0);
166 /* If possible, encode this offset as a repeated offset. */
167 if (match_offset == queue->R0) {
168 formatted_offset = 0;
170 } else if (match_offset == queue->R1) {
171 swap(queue->R0, queue->R1);
172 formatted_offset = 1;
174 } else if (match_offset == queue->R2) {
175 swap(queue->R0, queue->R2);
176 formatted_offset = 2;
179 /* Not a repeated offset. */
181 /* offsets of 0, 1, and 2 are reserved for the repeated offset
182 * codes, so non-repeated offsets must be encoded as 3+. The
183 * minimum offset is 1, so encode the offsets offset by 2. */
184 formatted_offset = match_offset + LZX_MIN_MATCH;
186 queue->R2 = queue->R1;
187 queue->R1 = queue->R0;
188 queue->R0 = match_offset;
190 /* The (now-formatted) offset will actually be encoded as a
191 * small position slot number that maps to a certain hard-coded
192 * offset (position base), followed by a number of extra bits---
193 * the position footer--- that are added to the position base to
194 * get the original formatted offset. */
196 position_slot = lzx_get_position_slot(formatted_offset);
197 position_footer = formatted_offset &
198 ((1 << lzx_get_num_extra_bits(position_slot)) - 1);
201 adjusted_match_len = match_len - LZX_MIN_MATCH;
203 /* Pack the position slot, position footer, and match length into an
204 * intermediate representation.
207 * ---- -----------------------------------------------------------
209 * 31 1 if a match, 0 if a literal.
211 * 30-25 position slot. This can be at most 50, so it will fit in 6
214 * 8-24 position footer. This is the offset of the real formatted
215 * offset from the position base. This can be at most 17 bits
216 * (since lzx_extra_bits[LZX_NUM_POSITION_SLOTS - 1] is 17).
218 * 0-7 length of match, offset by 2. This can be at most
219 * (LZX_MAX_MATCH - 2) == 255, so it will fit in 8 bits. */
221 (position_slot << 25) |
222 (position_footer << 8) |
223 (adjusted_match_len);
225 /* The match length must be at least 2, so let the adjusted match length
226 * be the match length minus 2.
228 * If it is less than 7, the adjusted match length is encoded as a 3-bit
229 * number offset by 2. Otherwise, the 3-bit length header is all 1's
230 * and the actual adjusted length is given as a symbol encoded with the
231 * length tree, offset by 7.
233 if (adjusted_match_len < LZX_NUM_PRIMARY_LENS) {
234 len_header = adjusted_match_len;
236 len_header = LZX_NUM_PRIMARY_LENS;
237 len_footer = adjusted_match_len - LZX_NUM_PRIMARY_LENS;
238 freq_tabs->len_freq_table[len_footer]++;
240 len_pos_header = (position_slot << 3) | len_header;
242 wimlib_assert(len_pos_header < LZX_MAINTREE_NUM_SYMBOLS - LZX_NUM_CHARS);
244 freq_tabs->main_freq_table[len_pos_header + LZX_NUM_CHARS]++;
247 * if (lzx_extra_bits[position_slot] >= 3) */
248 if (position_slot >= 8)
249 freq_tabs->aligned_freq_table[position_footer & 7]++;
255 * Writes a compressed literal match to the output.
257 * @out: The output bitstream.
258 * @block_type: The type of the block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM)
259 * @match: The match, encoded as a 32-bit number.
260 * @codes: Pointer to a structure that contains the codewords for the
261 * main, length, and aligned offset Huffman codes.
263 static int lzx_write_match(struct output_bitstream *out, int block_type,
264 u32 match, const struct lzx_codes *codes)
266 /* low 8 bits are the match length minus 2 */
267 uint match_len_minus_2 = match & 0xff;
268 /* Next 17 bits are the position footer */
269 uint position_footer = (match >> 8) & 0x1ffff; /* 17 bits */
270 /* Next 6 bits are the position slot. */
271 uint position_slot = (match >> 25) & 0x3f; /* 6 bits */
281 /* If the match length is less than MIN_MATCH (= 2) +
282 * NUM_PRIMARY_LENS (= 7), the length header contains
283 * the match length minus MIN_MATCH, and there is no
286 * Otherwise, the length header contains
287 * NUM_PRIMARY_LENS, and the length footer contains
288 * the match length minus NUM_PRIMARY_LENS minus
290 if (match_len_minus_2 < LZX_NUM_PRIMARY_LENS) {
291 len_header = match_len_minus_2;
292 /* No length footer-- mark it with a special
294 len_footer = (uint)(-1);
296 len_header = LZX_NUM_PRIMARY_LENS;
297 len_footer = match_len_minus_2 - LZX_NUM_PRIMARY_LENS;
300 /* Combine the position slot with the length header into
301 * a single symbol that will be encoded with the main
303 len_pos_header = (position_slot << 3) | len_header;
305 /* The actual main symbol is offset by LZX_NUM_CHARS because
306 * values under LZX_NUM_CHARS are used to indicate a literal
307 * byte rather than a match. */
308 main_symbol = len_pos_header + LZX_NUM_CHARS;
310 /* Output main symbol. */
311 ret = bitstream_put_bits(out, codes->main_codewords[main_symbol],
312 codes->main_lens[main_symbol]);
316 /* If there is a length footer, output it using the
317 * length Huffman code. */
318 if (len_footer != (uint)(-1)) {
319 ret = bitstream_put_bits(out, codes->len_codewords[len_footer],
320 codes->len_lens[len_footer]);
325 wimlib_assert(position_slot < LZX_NUM_POSITION_SLOTS);
327 num_extra_bits = lzx_extra_bits[position_slot];
329 /* For aligned offset blocks with at least 3 extra bits, output the
330 * verbatim bits literally, then the aligned bits encoded using the
331 * aligned offset tree. Otherwise, only the verbatim bits need to be
333 if ((block_type == LZX_BLOCKTYPE_ALIGNED) && (num_extra_bits >= 3)) {
335 verbatim_bits = position_footer >> 3;
336 ret = bitstream_put_bits(out, verbatim_bits,
341 aligned_bits = (position_footer & 7);
342 ret = bitstream_put_bits(out,
343 codes->aligned_codewords[aligned_bits],
344 codes->aligned_lens[aligned_bits]);
348 /* verbatim bits is the same as the position
349 * footer, in this case. */
350 ret = bitstream_put_bits(out, position_footer, num_extra_bits);
358 * Writes all compressed literals in a block, both matches and literal bytes, to
359 * the output bitstream.
361 * @out: The output bitstream.
362 * @block_type: The type of the block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM)
363 * @match_tab[]: The array of matches that will be output. It has length
364 * of @num_compressed_literals.
365 * @num_compressed_literals: Number of compressed literals to be output.
366 * @codes: Pointer to a structure that contains the codewords for the
367 * main, length, and aligned offset Huffman codes.
369 static int lzx_write_compressed_literals(struct output_bitstream *ostream,
371 const u32 match_tab[],
372 uint num_compressed_literals,
373 const struct lzx_codes *codes)
379 for (i = 0; i < num_compressed_literals; i++) {
380 match = match_tab[i];
382 /* High bit of the match indicates whether the match is an
383 * actual match (1) or a literal uncompressed byte (0) */
384 if (match & 0x80000000) {
386 ret = lzx_write_match(ostream, block_type, match,
392 wimlib_assert(match < LZX_NUM_CHARS);
393 ret = bitstream_put_bits(ostream,
394 codes->main_codewords[match],
395 codes->main_lens[match]);
404 * Writes a compressed Huffman tree to the output, preceded by the pretree for
407 * The Huffman tree is represented in the output as a series of path lengths
408 * from which the canonical Huffman code can be reconstructed. The path lengths
409 * themselves are compressed using a separate Huffman code, the pretree, which
410 * consists of LZX_PRETREE_NUM_SYMBOLS (= 20) symbols that cover all possible code
411 * lengths, plus extra codes for repeated lengths. The path lengths of the
412 * pretree precede the path lengths of the larger code and are uncompressed,
413 * consisting of 20 entries of 4 bits each.
415 * @out: The bitstream for the compressed output.
416 * @lens: The code lengths for the Huffman tree, indexed by symbol.
417 * @num_symbols: The number of symbols in the code.
419 static int lzx_write_compressed_tree(struct output_bitstream *out,
423 /* Frequencies of the length symbols, including the RLE symbols (NOT the
424 * actual lengths themselves). */
425 uint pretree_freqs[LZX_PRETREE_NUM_SYMBOLS];
426 u8 pretree_lens[LZX_PRETREE_NUM_SYMBOLS];
427 u16 pretree_codewords[LZX_PRETREE_NUM_SYMBOLS];
428 u8 output_syms[num_symbols * 2];
429 uint output_syms_idx;
433 uint additional_bits;
437 ZERO_ARRAY(pretree_freqs);
439 /* Since the code word lengths use a form of RLE encoding, the goal here
440 * is to find each run of identical lengths when going through them in
441 * symbol order (including runs of length 1). For each run, as many
442 * lengths are encoded using RLE as possible, and the rest are output
445 * output_syms[] will be filled in with the length symbols that will be
446 * output, including RLE codes, not yet encoded using the pre-tree.
448 * cur_run_len keeps track of how many code word lengths are in the
449 * current run of identical lengths.
453 for (i = 1; i <= num_symbols; i++) {
455 if (i != num_symbols && lens[i] == lens[i - 1]) {
456 /* Still in a run--- keep going. */
461 /* Run ended! Check if it is a run of zeroes or a run of
464 /* The symbol that was repeated in the run--- not to be confused
465 * with the length *of* the run (cur_run_len) */
466 len_in_run = lens[i - 1];
468 if (len_in_run == 0) {
469 /* A run of 0's. Encode it in as few length
470 * codes as we can. */
472 /* The magic length 18 indicates a run of 20 + n zeroes,
473 * where n is an uncompressed literal 5-bit integer that
474 * follows the magic length. */
475 while (cur_run_len >= 20) {
477 additional_bits = min(cur_run_len - 20, 0x1f);
479 output_syms[output_syms_idx++] = 18;
480 output_syms[output_syms_idx++] = additional_bits;
481 cur_run_len -= 20 + additional_bits;
484 /* The magic length 17 indicates a run of 4 + n zeroes,
485 * where n is an uncompressed literal 4-bit integer that
486 * follows the magic length. */
487 while (cur_run_len >= 4) {
488 additional_bits = min(cur_run_len - 4, 0xf);
490 output_syms[output_syms_idx++] = 17;
491 output_syms[output_syms_idx++] = additional_bits;
492 cur_run_len -= 4 + additional_bits;
497 /* A run of nonzero lengths. */
499 /* The magic length 19 indicates a run of 4 + n
500 * nonzeroes, where n is a literal bit that follows the
501 * magic length, and where the value of the lengths in
502 * the run is given by an extra length symbol, encoded
503 * with the pretree, that follows the literal bit.
505 * The extra length symbol is encoded as a difference
506 * from the length of the codeword for the first symbol
507 * in the run in the previous tree.
509 while (cur_run_len >= 4) {
510 additional_bits = (cur_run_len > 4);
511 delta = -(char)len_in_run;
515 pretree_freqs[(unsigned char)delta]++;
516 output_syms[output_syms_idx++] = 19;
517 output_syms[output_syms_idx++] = additional_bits;
518 output_syms[output_syms_idx++] = delta;
519 cur_run_len -= 4 + additional_bits;
523 /* Any remaining lengths in the run are outputted without RLE,
524 * as a difference from the length of that codeword in the
526 while (cur_run_len--) {
527 delta = -(char)len_in_run;
531 pretree_freqs[(unsigned char)delta]++;
532 output_syms[output_syms_idx++] = delta;
538 wimlib_assert(output_syms_idx < ARRAY_LEN(output_syms));
540 /* Build the pretree from the frequencies of the length symbols. */
542 make_canonical_huffman_code(LZX_PRETREE_NUM_SYMBOLS,
543 LZX_MAX_CODEWORD_LEN,
544 pretree_freqs, pretree_lens,
547 /* Write the lengths of the pretree codes to the output. */
548 for (i = 0; i < LZX_PRETREE_NUM_SYMBOLS; i++)
549 bitstream_put_bits(out, pretree_lens[i],
550 LZX_PRETREE_ELEMENT_SIZE);
552 /* Write the length symbols, encoded with the pretree, to the output. */
555 while (i < output_syms_idx) {
556 pretree_sym = output_syms[i++];
558 bitstream_put_bits(out, pretree_codewords[pretree_sym],
559 pretree_lens[pretree_sym]);
560 switch (pretree_sym) {
562 bitstream_put_bits(out, output_syms[i++], 4);
565 bitstream_put_bits(out, output_syms[i++], 5);
568 bitstream_put_bits(out, output_syms[i++], 1);
569 bitstream_put_bits(out,
570 pretree_codewords[output_syms[i]],
571 pretree_lens[output_syms[i]]);
581 /* Builds the canonical Huffman code for the main tree, the length tree, and the
582 * aligned offset tree. */
583 static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs,
584 struct lzx_codes *codes)
586 make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS,
587 LZX_MAX_CODEWORD_LEN,
588 freq_tabs->main_freq_table,
590 codes->main_codewords);
592 make_canonical_huffman_code(LZX_LENTREE_NUM_SYMBOLS,
593 LZX_MAX_CODEWORD_LEN,
594 freq_tabs->len_freq_table,
596 codes->len_codewords);
598 make_canonical_huffman_code(LZX_ALIGNEDTREE_NUM_SYMBOLS, 8,
599 freq_tabs->aligned_freq_table,
601 codes->aligned_codewords);
604 /* Do the 'E8' preprocessing, where the targets of x86 CALL instructions were
605 * changed from relative offsets to absolute offsets. This type of
606 * preprocessing can be used on any binary data even if it is not actually
607 * machine code. It seems to always be used in WIM files, even though there is
608 * no bit to indicate that it actually is used, unlike in the LZX compressed
609 * format as used in other file formats such as the cabinet format, where a bit
610 * is reserved for that purpose. */
611 static void do_call_insn_preprocessing(u8 uncompressed_data[],
612 uint uncompressed_data_len)
615 int file_size = LZX_MAGIC_FILESIZE;
619 /* Not enabled in the last 6 bytes, which means the 5-byte call
620 * instruction cannot start in the last *10* bytes. */
621 while (i < uncompressed_data_len - 10) {
622 if (uncompressed_data[i] != 0xe8) {
626 rel_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1));
628 if (rel_offset >= -i && rel_offset < file_size) {
629 if (rel_offset < file_size - i) {
630 /* "good translation" */
631 abs_offset = rel_offset + i;
633 /* "compensating translation" */
634 abs_offset = rel_offset - file_size;
636 *(int32_t*)(uncompressed_data + i + 1) = cpu_to_le32(abs_offset);
643 static const struct lz_params lzx_lz_params = {
645 /* LZX_MIN_MATCH == 2, but 2-character matches are rarely useful; the
646 * minimum match for compression is set to 3 instead. */
649 .max_match = LZX_MAX_MATCH,
650 .good_match = LZX_MAX_MATCH,
651 .nice_match = LZX_MAX_MATCH,
652 .max_chain_len = LZX_MAX_MATCH,
653 .max_lazy_match = LZX_MAX_MATCH,
658 * Performs LZX compression on a block of data.
660 * @__uncompressed_data: Pointer to the data to be compressed.
661 * @uncompressed_len: Length, in bytes, of the data to be compressed.
662 * @compressed_data: Pointer to a location at least (@uncompressed_len - 1)
663 * bytes long into which the compressed data may be
665 * @compressed_len_ret: A pointer to an unsigned int into which the length of
666 * the compressed data may be returned.
668 * Returns zero if compression was successfully performed. In that case
669 * @compressed_data and @compressed_len_ret will contain the compressed data and
670 * its length. A return value of nonzero means that compressing the data did
671 * not reduce its size, and @compressed_data will not contain the full
674 int lzx_compress(const void *__uncompressed_data, uint uncompressed_len,
675 void *compressed_data, uint *compressed_len_ret)
677 struct output_bitstream ostream;
678 u8 uncompressed_data[uncompressed_len + LZX_MAX_MATCH];
679 struct lzx_freq_tables freq_tabs;
680 struct lzx_codes codes;
681 u32 match_tab[uncompressed_len];
682 struct lru_queue queue = {.R0 = 1, .R1 = 1, .R2 = 1};
687 int block_type = LZX_BLOCKTYPE_ALIGNED;
689 LZX_DEBUG("uncompressed_len = %u", uncompressed_len);
691 if (uncompressed_len < 100)
695 memset(&freq_tabs, 0, sizeof(freq_tabs));
697 /* The input data must be preprocessed. To avoid changing the original
698 * input, copy it to a temporary buffer. */
699 memcpy(uncompressed_data, __uncompressed_data, uncompressed_len);
702 /* Before doing any actual compression, do the call instruction (0xe8
703 * byte) translation on the uncompressed data. */
704 do_call_insn_preprocessing(uncompressed_data, uncompressed_len);
707 /* Determine the sequence of matches and literals that will be output,
708 * and in the process, keep counts of the number of times each symbol
709 * will be output, so that the Huffman trees can be made. */
711 num_matches = lz_analyze_block(uncompressed_data, uncompressed_len,
712 match_tab, lzx_record_match,
713 lzx_record_literal, &freq_tabs,
714 &queue, freq_tabs.main_freq_table,
717 LZX_DEBUG("using %u matches", num_matches);
719 lzx_make_huffman_codes(&freq_tabs, &codes);
721 /* Initialize the output bitstream. */
722 init_output_bitstream(&ostream, compressed_data, uncompressed_len - 1);
724 /* The first three bits tell us what kind of block it is, and are one
725 * of the LZX_BLOCKTYPE_* values. */
726 bitstream_put_bits(&ostream, block_type, 3);
728 /* The next bit indicates whether the block size is the default (32768),
729 * indicated by a 1 bit, or whether the block size is given by the next
730 * 16 bits, indicated by a 0 bit. */
731 if (uncompressed_len == 32768) {
732 bitstream_put_bits(&ostream, 1, 1);
734 bitstream_put_bits(&ostream, 0, 1);
735 bitstream_put_bits(&ostream, uncompressed_len, 16);
738 /* Write out the aligned offset tree. Note that M$ lies and says that
739 * the aligned offset tree comes after the length tree, but that is
740 * wrong; it actually is before the main tree. */
741 if (block_type == LZX_BLOCKTYPE_ALIGNED)
742 for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
743 bitstream_put_bits(&ostream, codes.aligned_lens[i],
744 LZX_ALIGNEDTREE_ELEMENT_SIZE);
746 /* Write the pre-tree and lengths for the first LZX_NUM_CHARS symbols in the
748 ret = lzx_write_compressed_tree(&ostream, codes.main_lens,
753 /* Write the pre-tree and symbols for the rest of the main tree. */
754 ret = lzx_write_compressed_tree(&ostream, codes.main_lens +
756 LZX_MAINTREE_NUM_SYMBOLS -
761 /* Write the pre-tree and symbols for the length tree. */
762 ret = lzx_write_compressed_tree(&ostream, codes.len_lens,
763 LZX_LENTREE_NUM_SYMBOLS);
767 /* Write the compressed literals. */
768 ret = lzx_write_compressed_literals(&ostream, block_type,
769 match_tab, num_matches, &codes);
773 ret = flush_output_bitstream(&ostream);
777 compressed_len = ostream.bit_output - (u8*)compressed_data;
779 LZX_DEBUG("Compressed %u => %u bytes",
780 uncompressed_len, compressed_len);
782 *compressed_len_ret = compressed_len;
784 #ifdef ENABLE_VERIFY_COMPRESSION
785 /* Verify that we really get the same thing back when decompressing. */
786 LZX_DEBUG("Verifying the compressed data.");
787 u8 buf[uncompressed_len];
788 ret = lzx_decompress(compressed_data, compressed_len, buf,
791 ERROR("lzx_compress(): Failed to decompress data we compressed");
795 for (i = 0; i < uncompressed_len; i++) {
796 if (buf[i] != *((u8*)__uncompressed_data + i)) {
797 ERROR("lzx_compress(): Data we compressed didn't "
798 "decompress to the original data (difference at "
799 "byte %u of %u)", i + 1, uncompressed_len);
803 LZX_DEBUG("Compression verified to be correct.");