4 * A compressor that produces output compatible with the XPRESS (Huffman
5 * version) compression format.
9 * Copyright (C) 2012, 2013, 2014 Eric Biggers
11 * This file is free software; you can redistribute it and/or modify it under
12 * the terms of the GNU Lesser General Public License as published by the Free
13 * Software Foundation; either version 3 of the License, or (at your option) any
16 * This file is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
21 * You should have received a copy of the GNU Lesser General Public License
22 * along with this file; if not, see http://www.gnu.org/licenses/.
29 #include "wimlib/compress_common.h"
30 #include "wimlib/compressor_ops.h"
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/lz_mf.h"
34 #include "wimlib/util.h"
35 #include "wimlib/xpress.h"
40 #define XPRESS_CACHE_PER_POS 8
41 #define XPRESS_OPTIM_ARRAY_LENGTH 4096
43 struct xpress_compressor;
45 struct xpress_mc_pos_data;
47 /* Internal compression parameters */
48 struct xpress_compressor_params {
50 /* See xpress_choose_items() */
51 u32 (*choose_items_func)(struct xpress_compressor *);
53 /* For near-optimal parsing only */
56 /* Match-finding algorithm and parameters */
57 enum lz_mf_algo mf_algo;
59 u32 nice_match_length;
62 /* State of the XPRESS compressor */
63 struct xpress_compressor {
65 /* Internal compression parameters */
66 struct xpress_compressor_params params;
68 /* Data currently being compressed */
72 /* Lempel-Ziv match-finder */
75 /* Optimal parsing data */
76 unsigned (*get_matches_func)(struct xpress_compressor *,
77 const struct lz_match **);
78 void (*skip_bytes_func)(struct xpress_compressor *, unsigned n);
79 struct lz_match *cached_matches;
80 struct lz_match *cache_ptr;
81 struct lz_match *cache_limit;
82 struct xpress_mc_pos_data *optimum;
83 u8 costs[XPRESS_NUM_SYMBOLS];
85 /* The selected sequence of matches/literals */
86 struct xpress_item *chosen_items;
88 /* Symbol frequency counters */
89 u32 freqs[XPRESS_NUM_SYMBOLS];
91 /* The current Huffman code */
92 u32 codewords[XPRESS_NUM_SYMBOLS];
93 u8 lens[XPRESS_NUM_SYMBOLS];
96 /* Intermediate XPRESS match/literal format */
100 * Bits 9 - 24: Length - XPRESS_MIN_MATCH_LEN
101 * Bits 25 - 28: Number of extra offset bits
102 * Bits 29+ : Extra offset bits */
108 * Match chooser position data:
110 * An array of these structures is used during the near-optimal match-choosing
111 * algorithm. They correspond to consecutive positions in the window and are
112 * used to keep track of the cost to reach each position, and the match/literal
113 * choices that need to be chosen to reach that position.
115 struct xpress_mc_pos_data {
117 /* The cost, in bits, of the lowest-cost path that has been found to
118 * reach this position. This can change as progressively lower cost
119 * paths are found to reach this position. */
121 #define MC_INFINITE_COST UINT32_MAX
123 /* The match or literal that was taken to reach this position. This can
124 * change as progressively lower cost paths are found to reach this
127 * This variable is divided into two bitfields.
130 * Low bits are 1, high bits are the literal.
133 * Low bits are the match length, high bits are the offset.
136 #define MC_OFFSET_SHIFT 16
137 #define MC_LEN_MASK (((u32)1 << MC_OFFSET_SHIFT) - 1)
142 * Structure to keep track of the current state of sending data to the
143 * compressed output buffer.
145 * The XPRESS bitstream is encoded as a sequence of little endian 16-bit coding
146 * units interwoven with literal bytes.
148 struct xpress_output_bitstream {
150 /* Bits that haven't yet been written to the output buffer. */
153 /* Number of bits currently held in @bitbuf. */
156 /* Pointer to the start of the output buffer. */
159 /* Pointer to the location in the ouput buffer at which to write the
163 /* Pointer to the location in the output buffer at which to write the
164 * next 16 bits, after @next_bits. */
167 /* Pointer to the location in the output buffer at which to write the
168 * next literal byte. */
171 /* Pointer to the end of the output buffer. */
176 * Initialize the output bitstream.
179 * The output bitstream structure to initialize.
181 * The buffer to write to.
183 * Size of @buffer, in bytes. Must be at least 4.
186 xpress_init_output(struct xpress_output_bitstream *os, void *buffer, u32 size)
191 os->next_bits = os->start;
192 os->next_bits2 = os->start + 2;
193 os->next_byte = os->start + 4;
194 os->end = os->start + size;
198 * Write some bits to the output bitstream.
200 * The bits are given by the low-order @num_bits bits of @bits. Higher-order
201 * bits in @bits cannot be set. At most 16 bits can be written at once.
203 * If the output buffer space is exhausted, then the bits will be ignored, and
204 * xpress_flush_output() will return 0 when it gets called.
207 xpress_write_bits(struct xpress_output_bitstream *os,
208 const u32 bits, const unsigned int num_bits)
210 /* This code is optimized for XPRESS, which never needs to write more
211 * than 16 bits at once. */
213 os->bitcount += num_bits;
214 os->bitbuf = (os->bitbuf << num_bits) | bits;
216 if (os->bitcount > 16) {
218 if (os->end - os->next_byte >= 2) {
219 *(le16 *)os->next_bits = cpu_to_le16(os->bitbuf >> os->bitcount);
220 os->next_bits = os->next_bits2;
221 os->next_bits2 = os->next_byte;
228 * Interweave a literal byte into the output bitstream.
231 xpress_write_byte(struct xpress_output_bitstream *os, u8 byte)
233 if (os->next_byte < os->end)
234 *os->next_byte++ = byte;
238 * Flush the last coding unit to the output buffer if needed. Return the total
239 * number of bytes written to the output buffer, or 0 if an overflow occurred.
242 xpress_flush_output(struct xpress_output_bitstream *os)
244 if (unlikely(os->end - os->next_byte < 2))
247 *(le16 *)os->next_bits = cpu_to_le16(os->bitbuf << (16 - os->bitcount));
248 *(le16 *)os->next_bits2 = cpu_to_le16(0);
250 return os->next_byte - os->start;
253 /* Output a match or literal. */
255 xpress_write_item(struct xpress_item item, struct xpress_output_bitstream *os,
256 const u32 codewords[], const u8 lens[])
258 u64 data = item.data;
260 unsigned adjusted_len;
261 unsigned num_extra_bits;
264 symbol = data & 0x1FF;
266 xpress_write_bits(os, codewords[symbol], lens[symbol]);
268 if (symbol < XPRESS_NUM_CHARS) /* Literal? */
271 adjusted_len = (data >> 9) & 0xFFFF;
273 /* If length >= 18, one extra length byte.
274 * If length >= 273, three (total) extra length bytes. */
275 if (adjusted_len >= 0xf) {
276 u8 byte1 = min(adjusted_len - 0xf, 0xff);
277 xpress_write_byte(os, byte1);
279 xpress_write_byte(os, adjusted_len & 0xff);
280 xpress_write_byte(os, adjusted_len >> 8);
284 num_extra_bits = (data >> 25) & 0xF;
285 extra_bits = data >> 29;
287 xpress_write_bits(os, extra_bits, num_extra_bits);
290 /* Output a sequence of XPRESS matches and literals. */
292 xpress_write_items(struct xpress_output_bitstream *os,
293 const struct xpress_item items[], u32 num_items,
294 const u32 codewords[], const u8 lens[])
296 for (u32 i = 0; i < num_items; i++)
297 xpress_write_item(items[i], os, codewords, lens);
299 /* End-of-data symbol (required for MS compatibility) */
300 xpress_write_bits(os, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]);
303 /* Make the Huffman code for XPRESS.
305 * Takes as input c->freqs and produces as output c->lens and c->codewords. */
307 xpress_make_huffman_code(struct xpress_compressor *c)
309 make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN,
310 c->freqs, c->lens, c->codewords);
313 /* Tally, and optionally record, the specified literal byte. */
315 xpress_declare_literal(struct xpress_compressor *c, unsigned literal,
316 struct xpress_item **next_chosen_item)
320 if (next_chosen_item) {
321 *(*next_chosen_item)++ = (struct xpress_item) {
327 /* Tally, and optionally record, the specified match. */
329 xpress_declare_match(struct xpress_compressor *c,
330 unsigned len, unsigned offset,
331 struct xpress_item **next_chosen_item)
333 unsigned adjusted_len = len - XPRESS_MIN_MATCH_LEN;
334 unsigned len_hdr = min(adjusted_len, 0xf);
335 unsigned offset_bsr = bsr32(offset);
336 unsigned sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr);
340 if (next_chosen_item) {
341 *(*next_chosen_item)++ = (struct xpress_item) {
343 ((u64)adjusted_len << 9) |
344 ((u64)offset_bsr << 25) |
345 ((u64)(offset ^ (1U << offset_bsr)) << 29),
350 /* Tally, and optionally record, the specified match or literal. */
352 xpress_declare_item(struct xpress_compressor *c, u32 mc_item_data,
353 struct xpress_item **next_chosen_item)
355 unsigned len = mc_item_data & MC_LEN_MASK;
356 unsigned offset_data = mc_item_data >> MC_OFFSET_SHIFT;
359 xpress_declare_literal(c, offset_data, next_chosen_item);
361 xpress_declare_match(c, len, offset_data, next_chosen_item);
365 xpress_record_item_list(struct xpress_compressor *c,
366 struct xpress_mc_pos_data *cur_optimum_ptr,
367 struct xpress_item **next_chosen_item)
369 struct xpress_mc_pos_data *end_optimum_ptr;
373 /* The list is currently in reverse order (last item to first item).
375 end_optimum_ptr = cur_optimum_ptr;
376 saved_item = cur_optimum_ptr->mc_item_data;
379 cur_optimum_ptr -= item & MC_LEN_MASK;
380 saved_item = cur_optimum_ptr->mc_item_data;
381 cur_optimum_ptr->mc_item_data = item;
382 } while (cur_optimum_ptr != c->optimum);
384 /* Walk the list of items from beginning to end, tallying and recording
387 xpress_declare_item(c, cur_optimum_ptr->mc_item_data, next_chosen_item);
388 cur_optimum_ptr += (cur_optimum_ptr->mc_item_data) & MC_LEN_MASK;
389 } while (cur_optimum_ptr != end_optimum_ptr);
393 xpress_tally_item_list(struct xpress_compressor *c,
394 struct xpress_mc_pos_data *cur_optimum_ptr)
396 /* Since we're just tallying the items, we don't need to reverse the
397 * list. Processing the items in reverse order is fine. */
399 xpress_declare_item(c, cur_optimum_ptr->mc_item_data, NULL);
400 cur_optimum_ptr -= (cur_optimum_ptr->mc_item_data & MC_LEN_MASK);
401 } while (cur_optimum_ptr != c->optimum);
404 /* Tally, and optionally (if next_chosen_item != NULL) record, in order, all
405 * items in the current list of items found by the match-chooser. */
407 xpress_declare_item_list(struct xpress_compressor *c,
408 struct xpress_mc_pos_data *cur_optimum_ptr,
409 struct xpress_item **next_chosen_item)
411 if (next_chosen_item)
412 xpress_record_item_list(c, cur_optimum_ptr, next_chosen_item);
414 xpress_tally_item_list(c, cur_optimum_ptr);
418 xpress_get_matches_fillcache(struct xpress_compressor *c,
419 const struct lz_match **matches_ret)
421 struct lz_match *cache_ptr;
422 struct lz_match *matches;
423 unsigned num_matches;
425 cache_ptr = c->cache_ptr;
426 matches = cache_ptr + 1;
427 if (likely(cache_ptr <= c->cache_limit)) {
428 num_matches = lz_mf_get_matches(c->mf, matches);
429 cache_ptr->len = num_matches;
430 c->cache_ptr = matches + num_matches;
434 *matches_ret = matches;
439 xpress_get_matches_usecache(struct xpress_compressor *c,
440 const struct lz_match **matches_ret)
442 struct lz_match *cache_ptr;
443 struct lz_match *matches;
444 unsigned num_matches;
446 cache_ptr = c->cache_ptr;
447 matches = cache_ptr + 1;
448 if (cache_ptr <= c->cache_limit) {
449 num_matches = cache_ptr->len;
450 c->cache_ptr = matches + num_matches;
454 *matches_ret = matches;
459 xpress_get_matches_usecache_nocheck(struct xpress_compressor *c,
460 const struct lz_match **matches_ret)
462 struct lz_match *cache_ptr;
463 struct lz_match *matches;
464 unsigned num_matches;
466 cache_ptr = c->cache_ptr;
467 matches = cache_ptr + 1;
468 num_matches = cache_ptr->len;
469 c->cache_ptr = matches + num_matches;
470 *matches_ret = matches;
475 xpress_get_matches_noncaching(struct xpress_compressor *c,
476 const struct lz_match **matches_ret)
478 *matches_ret = c->cached_matches;
479 return lz_mf_get_matches(c->mf, c->cached_matches);
483 * Find matches at the next position in the window.
485 * This uses a wrapper function around the underlying match-finder.
487 * Returns the number of matches found and sets *matches_ret to point to the
488 * matches array. The matches will be sorted by strictly increasing length and
491 static inline unsigned
492 xpress_get_matches(struct xpress_compressor *c,
493 const struct lz_match **matches_ret)
495 return (*c->get_matches_func)(c, matches_ret);
499 xpress_skip_bytes_fillcache(struct xpress_compressor *c, unsigned n)
501 struct lz_match *cache_ptr;
503 cache_ptr = c->cache_ptr;
504 lz_mf_skip_positions(c->mf, n);
505 if (cache_ptr <= c->cache_limit) {
509 } while (--n && likely(cache_ptr <= c->cache_limit));
511 c->cache_ptr = cache_ptr;
515 xpress_skip_bytes_usecache(struct xpress_compressor *c, unsigned n)
517 struct lz_match *cache_ptr;
519 cache_ptr = c->cache_ptr;
520 if (likely(cache_ptr <= c->cache_limit)) {
522 cache_ptr += 1 + cache_ptr->len;
523 } while (--n && likely(cache_ptr <= c->cache_limit));
525 c->cache_ptr = cache_ptr;
529 xpress_skip_bytes_usecache_nocheck(struct xpress_compressor *c, unsigned n)
531 struct lz_match *cache_ptr;
533 cache_ptr = c->cache_ptr;
535 cache_ptr += 1 + cache_ptr->len;
537 c->cache_ptr = cache_ptr;
541 xpress_skip_bytes_noncaching(struct xpress_compressor *c, unsigned n)
543 lz_mf_skip_positions(c->mf, n);
547 * Skip the specified number of positions in the window (don't search for
550 * This uses a wrapper function around the underlying match-finder.
553 xpress_skip_bytes(struct xpress_compressor *c, unsigned n)
555 return (*c->skip_bytes_func)(c, n);
558 /* Set default XPRESS Huffman symbol costs to bootstrap the iterative
559 * optimization algorithm. */
561 xpress_set_default_costs(u8 costs[])
565 /* Literal symbols */
566 for (i = 0; i < XPRESS_NUM_CHARS; i++)
570 for (; i < XPRESS_NUM_SYMBOLS; i++)
574 /* Copy the Huffman codeword lengths array @lens to the Huffman symbol costs
575 * array @costs, but also assign a default cost to each 0-length (unused)
578 xpress_set_costs(u8 costs[], const u8 lens[])
580 for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i++)
581 costs[i] = lens[i] ? lens[i] : XPRESS_MAX_CODEWORD_LEN;
585 * Consider coding each match in @matches.
587 * @matches must be sorted by strictly increasing length and strictly
588 * increasing offset. This is guaranteed by the match-finder.
590 * We consider each length from the minimum (3) to the longest
591 * (matches[num_matches - 1].len). For each length, we consider only
592 * the smallest offset for which that length is available. Although
593 * this is not guaranteed to be optimal due to the possibility of a
594 * larger offset costing less than a smaller offset to code, this is a
595 * very useful heuristic.
598 xpress_consider_matches(struct xpress_compressor *c,
599 struct xpress_mc_pos_data *cur_optimum_ptr,
600 const struct lz_match matches[],
601 unsigned num_matches)
604 unsigned len = XPRESS_MIN_MATCH_LEN;
609 unsigned adjusted_len;
613 if (matches[num_matches - 1].len < 0xf + XPRESS_MIN_MATCH_LEN) {
614 /* All lengths are small. Optimize accordingly. */
616 offset = matches[i].offset;
617 offset_bsr = bsr32(offset);
618 len_hdr = len - XPRESS_MIN_MATCH_LEN;
619 sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr);
621 position_cost = cur_optimum_ptr->cost + offset_bsr;
623 cost = position_cost + c->costs[sym];
624 if (cost < (cur_optimum_ptr + len)->cost) {
625 (cur_optimum_ptr + len)->cost = cost;
626 (cur_optimum_ptr + len)->mc_item_data =
627 (offset << MC_OFFSET_SHIFT) | len;
630 } while (++len <= matches[i].len);
631 } while (++i != num_matches);
633 /* Some lengths are big. */
635 offset = matches[i].offset;
636 offset_bsr = bsr32(offset);
637 position_cost = cur_optimum_ptr->cost + offset_bsr;
639 adjusted_len = len - XPRESS_MIN_MATCH_LEN;
640 len_hdr = min(adjusted_len, 0xf);
641 sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr);
643 cost = position_cost + c->costs[sym];
644 if (adjusted_len >= 0xf) {
646 if (adjusted_len - 0xf >= 0xff)
650 if (cost < (cur_optimum_ptr + len)->cost) {
651 (cur_optimum_ptr + len)->cost = cost;
652 (cur_optimum_ptr + len)->mc_item_data =
653 (offset << MC_OFFSET_SHIFT) | len;
655 } while (++len <= matches[i].len);
656 } while (++i != num_matches);
661 * The main near-optimal parsing routine.
663 * Briefly, the algorithm does an approximate minimum-cost path search to find a
664 * "near-optimal" sequence of matches and literals to output, based on the
665 * current cost model. The algorithm steps forward, position by position (byte
666 * by byte), and updates the minimum cost path to reach each later position that
667 * can be reached using a match or literal from the current position. This is
668 * essentially Dijkstra's algorithm in disguise: the graph nodes are positions,
669 * the graph edges are possible matches/literals to code, and the cost of each
670 * edge is the estimated number of bits that will be required to output the
671 * corresponding match or literal. But one difference is that we actually
672 * compute the lowest-cost path in pieces, where each piece is terminated when
673 * there are no choices to be made.
675 * If next_chosen_item != NULL, then all items chosen will be recorded (saved in
676 * the chosen_items array). Otherwise, all items chosen will only be tallied
677 * (symbol frequencies tallied in c->freqs).
680 xpress_optim_pass(struct xpress_compressor *c,
681 struct xpress_item **next_chosen_item)
683 const u8 *window_end;
684 const u8 *window_ptr;
685 struct xpress_mc_pos_data *cur_optimum_ptr;
686 struct xpress_mc_pos_data *end_optimum_ptr;
687 const struct lz_match *matches;
688 unsigned num_matches;
689 unsigned longest_len;
693 window_ptr = c->cur_window;
694 window_end = &c->cur_window[c->cur_window_size];
697 /* Start building a new list of items, which will correspond to the next
698 * piece of the overall minimum-cost path. */
700 if (window_ptr == window_end)
703 cur_optimum_ptr = c->optimum;
704 cur_optimum_ptr->cost = 0;
705 end_optimum_ptr = cur_optimum_ptr;
707 /* The following loop runs once for each per byte in the window, except
708 * in a couple shortcut cases. */
711 /* Find matches with the current position. */
712 num_matches = xpress_get_matches(c, &matches);
716 longest_len = matches[num_matches - 1].len;
718 /* If there's a very long match, choose it immediately.
720 if (longest_len >= c->params.nice_match_length) {
722 xpress_skip_bytes(c, longest_len - 1);
723 window_ptr += longest_len;
725 if (cur_optimum_ptr != c->optimum)
726 xpress_declare_item_list(c, cur_optimum_ptr,
729 xpress_declare_match(c, longest_len,
730 matches[num_matches - 1].offset,
735 /* If reaching any positions for the first time,
736 * initialize their costs to "infinity". */
737 while (end_optimum_ptr < cur_optimum_ptr + longest_len)
738 (++end_optimum_ptr)->cost = MC_INFINITE_COST;
740 /* Consider coding a match. */
741 xpress_consider_matches(c, cur_optimum_ptr,
742 matches, num_matches);
744 /* No matches found. The only choice at this position
745 * is to code a literal. */
747 if (end_optimum_ptr == cur_optimum_ptr) {
749 /* Optimization for single literals. */
750 if (likely(cur_optimum_ptr == c->optimum)) {
751 xpress_declare_literal(c, *window_ptr++,
753 if (window_ptr == window_end)
758 (++end_optimum_ptr)->cost = MC_INFINITE_COST;
762 /* Consider coding a literal. */
763 literal = *window_ptr++;
764 cost = cur_optimum_ptr->cost + c->costs[literal];
765 if (cost < (cur_optimum_ptr + 1)->cost) {
766 (cur_optimum_ptr + 1)->cost = cost;
767 (cur_optimum_ptr + 1)->mc_item_data =
768 ((u32)literal << MC_OFFSET_SHIFT) | 1;
771 /* Advance to the next position. */
775 * This loop will terminate when either of the following
776 * conditions is true:
778 * (1) cur_optimum_ptr == end_optimum_ptr
780 * There are no paths that extend beyond the current
781 * position. In this case, any path to a later position
782 * must pass through the current position, so we can go
783 * ahead and choose the list of items that led to this
786 * (2) cur_optimum_ptr == &c->optimum[XPRESS_OPTIM_ARRAY_LENGTH]
788 * This bounds the number of times the algorithm can step
789 * forward before it is guaranteed to start choosing items.
790 * This limits the memory usage. But
791 * XPRESS_OPTIM_ARRAY_LENGTH is high enough that on most
792 * inputs this limit is never reached.
794 * Note: no check for end-of-window is needed because
795 * end-of-window will trigger condition (1).
797 if (cur_optimum_ptr == end_optimum_ptr ||
798 cur_optimum_ptr == &c->optimum[XPRESS_OPTIM_ARRAY_LENGTH])
802 /* Choose the current list of items that constitute the minimum-cost
803 * path to the current position. */
804 xpress_declare_item_list(c, cur_optimum_ptr, next_chosen_item);
808 /* Near-optimal parsing */
810 xpress_choose_near_optimal_items(struct xpress_compressor *c)
812 u32 num_passes_remaining = c->params.num_optim_passes;
813 struct xpress_item *next_chosen_item;
814 struct xpress_item **next_chosen_item_ptr;
816 /* Choose appropriate match-finder wrapper functions. */
817 if (c->params.num_optim_passes > 1) {
818 c->get_matches_func = xpress_get_matches_fillcache;
819 c->skip_bytes_func = xpress_skip_bytes_fillcache;
821 c->get_matches_func = xpress_get_matches_noncaching;
822 c->skip_bytes_func = xpress_skip_bytes_noncaching;
825 /* The first optimization pass will use a default cost model. Each
826 * additional optimization pass will use a cost model computed from the
829 * To improve performance, we only generate the array containing the
830 * matches and literals in intermediate form on the final pass. For
831 * earlier passes, tallying symbol frequencies is sufficient. */
832 xpress_set_default_costs(c->costs);
834 next_chosen_item_ptr = NULL;
836 /* Reset the match-finder wrapper. */
837 c->cache_ptr = c->cached_matches;
839 if (num_passes_remaining == 1) {
840 /* Last pass: actually generate the items. */
841 next_chosen_item = c->chosen_items;
842 next_chosen_item_ptr = &next_chosen_item;
845 /* Choose the items. */
846 xpress_optim_pass(c, next_chosen_item_ptr);
848 if (num_passes_remaining > 1) {
849 /* This isn't the last pass. */
851 /* Make the Huffman code from the symbol frequencies. */
852 c->freqs[XPRESS_END_OF_DATA]++;
853 xpress_make_huffman_code(c);
855 /* Reset symbol frequencies. */
856 memset(c->freqs, 0, sizeof(c->freqs));
858 /* Update symbol costs. */
859 xpress_set_costs(c->costs, c->lens);
861 /* Choose appopriate match-finder wrapper functions. */
862 if (c->cache_ptr <= c->cache_limit) {
863 c->get_matches_func = xpress_get_matches_usecache_nocheck;
864 c->skip_bytes_func = xpress_skip_bytes_usecache_nocheck;
866 c->get_matches_func = xpress_get_matches_usecache;
867 c->skip_bytes_func = xpress_skip_bytes_usecache;
870 } while (--num_passes_remaining);
872 /* Return the number of items chosen. */
873 return next_chosen_item - c->chosen_items;
878 xpress_choose_lazy_items(struct xpress_compressor *c)
880 const u8 *window_ptr = c->cur_window;
881 const u8 *window_end = &c->cur_window[c->cur_window_size];
882 struct xpress_item *next_chosen_item = c->chosen_items;
884 struct lz_mf *mf = c->mf;
885 struct lz_match *matches = c->cached_matches;
886 unsigned num_matches;
887 struct lz_match prev_match;
889 if (c->cur_window_size <= 8192)
890 len_3_too_far = 2048;
892 len_3_too_far = 4096;
895 /* Don't have match at previous position */
897 num_matches = lz_mf_get_matches(mf, matches);
900 if (num_matches == 0 ||
901 (matches[num_matches - 1].len == 3 &&
902 matches[num_matches - 1].offset >= len_3_too_far))
904 /* No matches found => output literal */
905 xpress_declare_literal(c, *(window_ptr - 1),
910 prev_match = matches[num_matches - 1];
913 /* Have match at previous position */
915 if (prev_match.len >= c->params.nice_match_length) {
916 /* Very long match found => output immediately */
917 xpress_declare_match(c, prev_match.len,
920 lz_mf_skip_positions(mf, prev_match.len - 1);
921 window_ptr += prev_match.len - 1;
925 num_matches = lz_mf_get_matches(mf, matches);
928 if (num_matches == 0 ||
929 (matches[num_matches - 1].len <= prev_match.len))
931 /* Next match is not longer => output previous match */
932 xpress_declare_match(c, prev_match.len,
935 lz_mf_skip_positions(mf, prev_match.len - 2);
936 window_ptr += prev_match.len - 2;
940 /* Next match is longer => output literal */
942 xpress_declare_literal(c, *(window_ptr - 2), &next_chosen_item);
944 prev_match = matches[num_matches - 1];
946 goto have_prev_match;
948 } while (window_ptr != window_end);
950 return next_chosen_item - c->chosen_items;
955 xpress_choose_greedy_items(struct xpress_compressor *c)
957 const u8 *window_ptr = c->cur_window;
958 const u8 *window_end = &c->cur_window[c->cur_window_size];
959 struct xpress_item *next_chosen_item = c->chosen_items;
961 struct lz_mf *mf = c->mf;
962 struct lz_match *matches = c->cached_matches;
963 unsigned num_matches;
965 if (c->cur_window_size <= 8192)
966 len_3_too_far = 2048;
968 len_3_too_far = 4096;
971 /* Get longest match at the current position. */
972 num_matches = lz_mf_get_matches(mf, matches);
974 if (num_matches == 0 ||
975 (matches[num_matches - 1].len == 3 &&
976 matches[num_matches - 1].offset >= len_3_too_far))
978 /* No match, or length 3 match with large offset.
979 * Choose a literal. */
980 xpress_declare_literal(c, *window_ptr, &next_chosen_item);
983 /* Match found. Choose it. */
984 unsigned len = matches[num_matches - 1].len;
985 unsigned offset = matches[num_matches - 1].offset;
987 xpress_declare_match(c, len, offset, &next_chosen_item);
988 lz_mf_skip_positions(mf, len - 1);
991 } while (window_ptr != window_end);
993 return next_chosen_item - c->chosen_items;
996 /* Literals-only parsing */
998 xpress_choose_literals(struct xpress_compressor *c)
1000 const u8 *window_ptr = c->cur_window;
1001 const u8 *window_end = &c->cur_window[c->cur_window_size];
1002 struct xpress_item *next_chosen_item = c->chosen_items;
1005 xpress_declare_literal(c, *window_ptr++, &next_chosen_item);
1006 } while (window_ptr != window_end);
1008 return next_chosen_item - c->chosen_items;
1012 * 'choose_items_func' is provided a data buffer c->cur_window of length
1013 * c->cur_window_size bytes. This data buffer will have already been loaded
1014 * into the match-finder c->mf. 'choose_items_func' must choose the
1015 * match/literal sequence to output to represent this data buffer. The
1016 * intermediate representation of this match/literal sequence must be recorded
1017 * in c->chosen_items, and the Huffman symbols used must be tallied in c->freqs.
1018 * The return value must be the number of items written to c->chosen_items.
1021 xpress_choose_items(struct xpress_compressor *c)
1023 return (*c->params.choose_items_func)(c);
1026 /* Set internal compression parameters for the specified compression level and
1027 * maximum window size. */
1029 xpress_build_params(unsigned int compression_level, u32 max_window_size,
1030 struct xpress_compressor_params *xpress_params)
1032 memset(xpress_params, 0, sizeof(*xpress_params));
1033 xpress_params->num_optim_passes = 1;
1035 if (compression_level == 1) {
1037 /* Literal-only parsing */
1038 xpress_params->choose_items_func = xpress_choose_literals;
1039 xpress_params->mf_algo = LZ_MF_NULL;
1041 } else if (compression_level < 30) {
1043 /* Greedy parsing */
1044 xpress_params->choose_items_func = xpress_choose_greedy_items;
1045 xpress_params->mf_algo = LZ_MF_HASH_CHAINS;
1046 xpress_params->nice_match_length = compression_level;
1047 xpress_params->max_search_depth = compression_level / 2;
1049 } else if (compression_level < 60) {
1052 xpress_params->choose_items_func = xpress_choose_lazy_items;
1053 xpress_params->mf_algo = LZ_MF_HASH_CHAINS;
1054 xpress_params->nice_match_length = compression_level;
1055 xpress_params->max_search_depth = compression_level / 2;
1059 /* Near-optimal parsing */
1060 xpress_params->choose_items_func = xpress_choose_near_optimal_items;
1061 if (max_window_size >= 16384)
1062 xpress_params->mf_algo = LZ_MF_BINARY_TREES;
1064 xpress_params->mf_algo = LZ_MF_HASH_CHAINS;
1065 xpress_params->num_optim_passes = compression_level / 40;
1066 xpress_params->nice_match_length = min(compression_level / 2,
1067 XPRESS_MAX_MATCH_LEN);
1068 xpress_params->max_search_depth = min(compression_level,
1069 XPRESS_MAX_MATCH_LEN);
1073 /* Given the internal compression parameters and maximum window size, build the
1074 * Lempel-Ziv match-finder parameters. */
1076 xpress_build_mf_params(const struct xpress_compressor_params *xpress_params,
1077 u32 max_window_size, struct lz_mf_params *mf_params)
1079 memset(mf_params, 0, sizeof(*mf_params));
1081 mf_params->algorithm = xpress_params->mf_algo;
1082 mf_params->max_window_size = max_window_size;
1083 mf_params->min_match_len = XPRESS_MIN_MATCH_LEN;
1084 mf_params->max_match_len = XPRESS_MAX_MATCH_LEN;
1085 mf_params->max_search_depth = xpress_params->max_search_depth;
1086 mf_params->nice_match_len = xpress_params->nice_match_length;
1090 xpress_free_compressor(void *_c);
1093 xpress_get_needed_memory(size_t max_window_size, unsigned int compression_level)
1096 struct xpress_compressor_params params;
1098 if (max_window_size > XPRESS_MAX_OFFSET + 1)
1101 xpress_build_params(compression_level, max_window_size, ¶ms);
1103 size += sizeof(struct xpress_compressor);
1106 size += lz_mf_get_needed_memory(params.mf_algo, max_window_size);
1109 if (params.choose_items_func == xpress_choose_near_optimal_items) {
1110 size += (XPRESS_OPTIM_ARRAY_LENGTH + params.nice_match_length) *
1111 sizeof(struct xpress_mc_pos_data);
1114 /* cached_matches */
1115 if (params.num_optim_passes > 1) {
1116 size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS,
1117 params.max_search_depth + 1);
1118 size += cache_len * sizeof(struct lz_match);
1120 size += params.max_search_depth * sizeof(struct lz_match);
1124 size += max_window_size * sizeof(struct xpress_item);
1130 xpress_create_compressor(size_t max_window_size, unsigned int compression_level,
1133 struct xpress_compressor *c;
1134 struct xpress_compressor_params params;
1135 struct lz_mf_params mf_params;
1137 if (max_window_size > XPRESS_MAX_OFFSET + 1)
1138 return WIMLIB_ERR_INVALID_PARAM;
1140 xpress_build_params(compression_level, max_window_size, ¶ms);
1141 xpress_build_mf_params(¶ms, max_window_size, &mf_params);
1143 c = CALLOC(1, sizeof(struct xpress_compressor));
1149 c->mf = lz_mf_alloc(&mf_params);
1153 if (params.choose_items_func == xpress_choose_near_optimal_items) {
1154 c->optimum = MALLOC((XPRESS_OPTIM_ARRAY_LENGTH +
1155 params.nice_match_length) *
1156 sizeof(struct xpress_mc_pos_data));
1161 if (params.num_optim_passes > 1) {
1162 size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS,
1163 params.max_search_depth + 1);
1164 c->cached_matches = MALLOC(cache_len * sizeof(struct lz_match));
1165 if (!c->cached_matches)
1167 c->cache_limit = c->cached_matches + cache_len -
1168 (params.max_search_depth + 1);
1170 c->cached_matches = MALLOC(params.max_search_depth *
1171 sizeof(struct lz_match));
1172 if (!c->cached_matches)
1176 c->chosen_items = MALLOC(max_window_size * sizeof(struct xpress_item));
1177 if (!c->chosen_items)
1184 xpress_free_compressor(c);
1185 return WIMLIB_ERR_NOMEM;
1189 xpress_compress(const void *uncompressed_data, size_t uncompressed_size,
1190 void *compressed_data, size_t compressed_size_avail, void *_c)
1192 struct xpress_compressor *c = _c;
1193 u32 num_chosen_items;
1195 struct xpress_output_bitstream os;
1196 u32 compressed_size;
1198 /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's
1199 * impossible to compress 256 bytes or less of data to less than the
1201 if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 50)
1204 /* Determine match/literal sequence. */
1205 c->cur_window = uncompressed_data;
1206 c->cur_window_size = uncompressed_size;
1207 lz_mf_load_window(c->mf, c->cur_window, c->cur_window_size);
1208 memset(c->freqs, 0, sizeof(c->freqs));
1210 num_chosen_items = xpress_choose_items(c);
1212 c->freqs[XPRESS_END_OF_DATA]++;
1213 xpress_make_huffman_code(c);
1215 /* Output the Huffman code as a series of 512 4-bit lengths. */
1216 cptr = compressed_data;
1217 for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i += 2)
1218 *cptr++ = (c->lens[i + 1] << 4) | c->lens[i];
1220 /* Output the encoded matches/literals. */
1221 xpress_init_output(&os, cptr,
1222 compressed_size_avail - XPRESS_NUM_SYMBOLS / 2);
1223 xpress_write_items(&os, c->chosen_items, num_chosen_items,
1224 c->codewords, c->lens);
1226 /* Flush any pending data and get the length of the compressed data. */
1227 compressed_size = xpress_flush_output(&os);
1228 if (compressed_size == 0)
1231 /* Return the length of the compressed data. */
1232 return compressed_size + XPRESS_NUM_SYMBOLS / 2;
1236 xpress_free_compressor(void *_c)
1238 struct xpress_compressor *c = _c;
1243 FREE(c->cached_matches);
1244 FREE(c->chosen_items);
1249 const struct compressor_ops xpress_compressor_ops = {
1250 .get_needed_memory = xpress_get_needed_memory,
1251 .create_compressor = xpress_create_compressor,
1252 .compress = xpress_compress,
1253 .free_compressor = xpress_free_compressor,