4 * A compressor that produces output compatible with the XPRESS (Huffman
5 * version) compression format.
9 * Copyright (C) 2012, 2013, 2014 Eric Biggers
11 * This file is free software; you can redistribute it and/or modify it under
12 * the terms of the GNU Lesser General Public License as published by the Free
13 * Software Foundation; either version 3 of the License, or (at your option) any
16 * This file is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
21 * You should have received a copy of the GNU Lesser General Public License
22 * along with this file; if not, see http://www.gnu.org/licenses/.
29 #include "wimlib/bitops.h"
30 #include "wimlib/compress_common.h"
31 #include "wimlib/compressor_ops.h"
32 #include "wimlib/endianness.h"
33 #include "wimlib/error.h"
34 #include "wimlib/lz_mf.h"
35 #include "wimlib/unaligned.h"
36 #include "wimlib/util.h"
37 #include "wimlib/xpress.h"
42 #define XPRESS_CACHE_PER_POS 8
43 #define XPRESS_OPTIM_ARRAY_LENGTH 4096
45 struct xpress_compressor;
47 struct xpress_mc_pos_data;
49 /* Internal compression parameters */
50 struct xpress_compressor_params {
52 /* See xpress_choose_items() */
53 u32 (*choose_items_func)(struct xpress_compressor *);
55 /* For near-optimal parsing only */
58 /* Match-finding algorithm and parameters */
59 enum lz_mf_algo mf_algo;
61 u32 nice_match_length;
64 /* State of the XPRESS compressor */
65 struct xpress_compressor {
67 /* Internal compression parameters */
68 struct xpress_compressor_params params;
70 /* Data currently being compressed */
74 /* Lempel-Ziv match-finder */
77 /* Optimal parsing data */
78 unsigned (*get_matches_func)(struct xpress_compressor *,
79 const struct lz_match **);
80 void (*skip_bytes_func)(struct xpress_compressor *, unsigned n);
81 struct lz_match *cached_matches;
82 struct lz_match *cache_ptr;
83 struct lz_match *cache_limit;
84 struct xpress_mc_pos_data *optimum;
85 u8 costs[XPRESS_NUM_SYMBOLS];
87 /* The selected sequence of matches/literals */
88 struct xpress_item *chosen_items;
90 /* Symbol frequency counters */
91 u32 freqs[XPRESS_NUM_SYMBOLS];
93 /* The current Huffman code */
94 u32 codewords[XPRESS_NUM_SYMBOLS];
95 u8 lens[XPRESS_NUM_SYMBOLS];
98 /* Intermediate XPRESS match/literal format */
101 /* Bits 0 - 8: Symbol
102 * Bits 9 - 24: Length - XPRESS_MIN_MATCH_LEN
103 * Bits 25 - 28: Number of extra offset bits
104 * Bits 29+ : Extra offset bits */
110 * Match chooser position data:
112 * An array of these structures is used during the near-optimal match-choosing
113 * algorithm. They correspond to consecutive positions in the window and are
114 * used to keep track of the cost to reach each position, and the match/literal
115 * choices that need to be chosen to reach that position.
117 struct xpress_mc_pos_data {
119 /* The cost, in bits, of the lowest-cost path that has been found to
120 * reach this position. This can change as progressively lower cost
121 * paths are found to reach this position. */
123 #define MC_INFINITE_COST UINT32_MAX
125 /* The match or literal that was taken to reach this position. This can
126 * change as progressively lower cost paths are found to reach this
129 * This variable is divided into two bitfields.
132 * Low bits are 1, high bits are the literal.
135 * Low bits are the match length, high bits are the offset.
138 #define MC_OFFSET_SHIFT 16
139 #define MC_LEN_MASK (((u32)1 << MC_OFFSET_SHIFT) - 1)
144 * Structure to keep track of the current state of sending data to the
145 * compressed output buffer.
147 * The XPRESS bitstream is encoded as a sequence of little endian 16-bit coding
148 * units interwoven with literal bytes.
150 struct xpress_output_bitstream {
152 /* Bits that haven't yet been written to the output buffer. */
155 /* Number of bits currently held in @bitbuf. */
158 /* Pointer to the start of the output buffer. */
161 /* Pointer to the location in the ouput buffer at which to write the
165 /* Pointer to the location in the output buffer at which to write the
166 * next 16 bits, after @next_bits. */
169 /* Pointer to the location in the output buffer at which to write the
170 * next literal byte. */
173 /* Pointer to the end of the output buffer. */
178 * Initialize the output bitstream.
181 * The output bitstream structure to initialize.
183 * The buffer to write to.
185 * Size of @buffer, in bytes. Must be at least 4.
188 xpress_init_output(struct xpress_output_bitstream *os, void *buffer, u32 size)
193 os->next_bits = os->start;
194 os->next_bits2 = os->start + 2;
195 os->next_byte = os->start + 4;
196 os->end = os->start + size;
200 * Write some bits to the output bitstream.
202 * The bits are given by the low-order @num_bits bits of @bits. Higher-order
203 * bits in @bits cannot be set. At most 16 bits can be written at once.
205 * If the output buffer space is exhausted, then the bits will be ignored, and
206 * xpress_flush_output() will return 0 when it gets called.
209 xpress_write_bits(struct xpress_output_bitstream *os,
210 const u32 bits, const unsigned int num_bits)
212 /* This code is optimized for XPRESS, which never needs to write more
213 * than 16 bits at once. */
215 os->bitcount += num_bits;
216 os->bitbuf = (os->bitbuf << num_bits) | bits;
218 if (os->bitcount > 16) {
220 if (os->end - os->next_byte >= 2) {
221 put_unaligned_u16_le(os->bitbuf >> os->bitcount, os->next_bits);
222 os->next_bits = os->next_bits2;
223 os->next_bits2 = os->next_byte;
230 * Interweave a literal byte into the output bitstream.
233 xpress_write_byte(struct xpress_output_bitstream *os, u8 byte)
235 if (os->next_byte < os->end)
236 *os->next_byte++ = byte;
240 * Flush the last coding unit to the output buffer if needed. Return the total
241 * number of bytes written to the output buffer, or 0 if an overflow occurred.
244 xpress_flush_output(struct xpress_output_bitstream *os)
246 if (unlikely(os->end - os->next_byte < 2))
249 put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), os->next_bits);
250 put_unaligned_u16_le(0, os->next_bits2);
252 return os->next_byte - os->start;
255 /* Output a match or literal. */
257 xpress_write_item(struct xpress_item item, struct xpress_output_bitstream *os,
258 const u32 codewords[], const u8 lens[])
260 u64 data = item.data;
262 unsigned adjusted_len;
263 unsigned num_extra_bits;
266 symbol = data & 0x1FF;
268 xpress_write_bits(os, codewords[symbol], lens[symbol]);
270 if (symbol < XPRESS_NUM_CHARS) /* Literal? */
273 adjusted_len = (data >> 9) & 0xFFFF;
275 /* If length >= 18, one extra length byte.
276 * If length >= 273, three (total) extra length bytes. */
277 if (adjusted_len >= 0xf) {
278 u8 byte1 = min(adjusted_len - 0xf, 0xff);
279 xpress_write_byte(os, byte1);
281 xpress_write_byte(os, adjusted_len & 0xff);
282 xpress_write_byte(os, adjusted_len >> 8);
286 num_extra_bits = (data >> 25) & 0xF;
287 extra_bits = data >> 29;
289 xpress_write_bits(os, extra_bits, num_extra_bits);
292 /* Output a sequence of XPRESS matches and literals. */
294 xpress_write_items(struct xpress_output_bitstream *os,
295 const struct xpress_item items[], u32 num_items,
296 const u32 codewords[], const u8 lens[])
298 for (u32 i = 0; i < num_items; i++)
299 xpress_write_item(items[i], os, codewords, lens);
301 /* End-of-data symbol (required for MS compatibility) */
302 xpress_write_bits(os, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]);
305 /* Make the Huffman code for XPRESS.
307 * Takes as input c->freqs and produces as output c->lens and c->codewords. */
309 xpress_make_huffman_code(struct xpress_compressor *c)
311 make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN,
312 c->freqs, c->lens, c->codewords);
315 /* Tally, and optionally record, the specified literal byte. */
317 xpress_declare_literal(struct xpress_compressor *c, unsigned literal,
318 struct xpress_item **next_chosen_item)
322 if (next_chosen_item) {
323 *(*next_chosen_item)++ = (struct xpress_item) {
329 /* Tally, and optionally record, the specified match. */
331 xpress_declare_match(struct xpress_compressor *c,
332 unsigned len, unsigned offset,
333 struct xpress_item **next_chosen_item)
335 unsigned adjusted_len = len - XPRESS_MIN_MATCH_LEN;
336 unsigned len_hdr = min(adjusted_len, 0xf);
337 unsigned offset_high_bit = fls32(offset);
338 unsigned sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
342 if (next_chosen_item) {
343 *(*next_chosen_item)++ = (struct xpress_item) {
345 ((u64)adjusted_len << 9) |
346 ((u64)offset_high_bit << 25) |
347 ((u64)(offset ^ (1U << offset_high_bit)) << 29),
352 /* Tally, and optionally record, the specified match or literal. */
354 xpress_declare_item(struct xpress_compressor *c, u32 mc_item_data,
355 struct xpress_item **next_chosen_item)
357 unsigned len = mc_item_data & MC_LEN_MASK;
358 unsigned offset_data = mc_item_data >> MC_OFFSET_SHIFT;
361 xpress_declare_literal(c, offset_data, next_chosen_item);
363 xpress_declare_match(c, len, offset_data, next_chosen_item);
367 xpress_record_item_list(struct xpress_compressor *c,
368 struct xpress_mc_pos_data *cur_optimum_ptr,
369 struct xpress_item **next_chosen_item)
371 struct xpress_mc_pos_data *end_optimum_ptr;
375 /* The list is currently in reverse order (last item to first item).
377 end_optimum_ptr = cur_optimum_ptr;
378 saved_item = cur_optimum_ptr->mc_item_data;
381 cur_optimum_ptr -= item & MC_LEN_MASK;
382 saved_item = cur_optimum_ptr->mc_item_data;
383 cur_optimum_ptr->mc_item_data = item;
384 } while (cur_optimum_ptr != c->optimum);
386 /* Walk the list of items from beginning to end, tallying and recording
389 xpress_declare_item(c, cur_optimum_ptr->mc_item_data, next_chosen_item);
390 cur_optimum_ptr += (cur_optimum_ptr->mc_item_data) & MC_LEN_MASK;
391 } while (cur_optimum_ptr != end_optimum_ptr);
395 xpress_tally_item_list(struct xpress_compressor *c,
396 struct xpress_mc_pos_data *cur_optimum_ptr)
398 /* Since we're just tallying the items, we don't need to reverse the
399 * list. Processing the items in reverse order is fine. */
401 xpress_declare_item(c, cur_optimum_ptr->mc_item_data, NULL);
402 cur_optimum_ptr -= (cur_optimum_ptr->mc_item_data & MC_LEN_MASK);
403 } while (cur_optimum_ptr != c->optimum);
406 /* Tally, and optionally (if next_chosen_item != NULL) record, in order, all
407 * items in the current list of items found by the match-chooser. */
409 xpress_declare_item_list(struct xpress_compressor *c,
410 struct xpress_mc_pos_data *cur_optimum_ptr,
411 struct xpress_item **next_chosen_item)
413 if (next_chosen_item)
414 xpress_record_item_list(c, cur_optimum_ptr, next_chosen_item);
416 xpress_tally_item_list(c, cur_optimum_ptr);
420 xpress_get_matches_fillcache(struct xpress_compressor *c,
421 const struct lz_match **matches_ret)
423 struct lz_match *cache_ptr;
424 struct lz_match *matches;
425 unsigned num_matches;
427 cache_ptr = c->cache_ptr;
428 matches = cache_ptr + 1;
429 if (likely(cache_ptr <= c->cache_limit)) {
430 num_matches = lz_mf_get_matches(c->mf, matches);
431 cache_ptr->len = num_matches;
432 c->cache_ptr = matches + num_matches;
436 *matches_ret = matches;
441 xpress_get_matches_usecache(struct xpress_compressor *c,
442 const struct lz_match **matches_ret)
444 struct lz_match *cache_ptr;
445 struct lz_match *matches;
446 unsigned num_matches;
448 cache_ptr = c->cache_ptr;
449 matches = cache_ptr + 1;
450 if (cache_ptr <= c->cache_limit) {
451 num_matches = cache_ptr->len;
452 c->cache_ptr = matches + num_matches;
456 *matches_ret = matches;
461 xpress_get_matches_usecache_nocheck(struct xpress_compressor *c,
462 const struct lz_match **matches_ret)
464 struct lz_match *cache_ptr;
465 struct lz_match *matches;
466 unsigned num_matches;
468 cache_ptr = c->cache_ptr;
469 matches = cache_ptr + 1;
470 num_matches = cache_ptr->len;
471 c->cache_ptr = matches + num_matches;
472 *matches_ret = matches;
477 xpress_get_matches_noncaching(struct xpress_compressor *c,
478 const struct lz_match **matches_ret)
480 *matches_ret = c->cached_matches;
481 return lz_mf_get_matches(c->mf, c->cached_matches);
485 * Find matches at the next position in the window.
487 * This uses a wrapper function around the underlying match-finder.
489 * Returns the number of matches found and sets *matches_ret to point to the
490 * matches array. The matches will be sorted by strictly increasing length and
493 static inline unsigned
494 xpress_get_matches(struct xpress_compressor *c,
495 const struct lz_match **matches_ret)
497 return (*c->get_matches_func)(c, matches_ret);
501 xpress_skip_bytes_fillcache(struct xpress_compressor *c, unsigned n)
503 struct lz_match *cache_ptr;
505 cache_ptr = c->cache_ptr;
506 lz_mf_skip_positions(c->mf, n);
507 if (cache_ptr <= c->cache_limit) {
511 } while (--n && likely(cache_ptr <= c->cache_limit));
513 c->cache_ptr = cache_ptr;
517 xpress_skip_bytes_usecache(struct xpress_compressor *c, unsigned n)
519 struct lz_match *cache_ptr;
521 cache_ptr = c->cache_ptr;
522 if (likely(cache_ptr <= c->cache_limit)) {
524 cache_ptr += 1 + cache_ptr->len;
525 } while (--n && likely(cache_ptr <= c->cache_limit));
527 c->cache_ptr = cache_ptr;
531 xpress_skip_bytes_usecache_nocheck(struct xpress_compressor *c, unsigned n)
533 struct lz_match *cache_ptr;
535 cache_ptr = c->cache_ptr;
537 cache_ptr += 1 + cache_ptr->len;
539 c->cache_ptr = cache_ptr;
543 xpress_skip_bytes_noncaching(struct xpress_compressor *c, unsigned n)
545 lz_mf_skip_positions(c->mf, n);
549 * Skip the specified number of positions in the window (don't search for
552 * This uses a wrapper function around the underlying match-finder.
555 xpress_skip_bytes(struct xpress_compressor *c, unsigned n)
557 return (*c->skip_bytes_func)(c, n);
560 /* Set default XPRESS Huffman symbol costs to bootstrap the iterative
561 * optimization algorithm. */
563 xpress_set_default_costs(u8 costs[])
567 /* Literal symbols */
568 for (i = 0; i < XPRESS_NUM_CHARS; i++)
572 for (; i < XPRESS_NUM_SYMBOLS; i++)
576 /* Copy the Huffman codeword lengths array @lens to the Huffman symbol costs
577 * array @costs, but also assign a default cost to each 0-length (unused)
580 xpress_set_costs(u8 costs[], const u8 lens[])
582 for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i++)
583 costs[i] = lens[i] ? lens[i] : XPRESS_MAX_CODEWORD_LEN;
587 * Consider coding each match in @matches.
589 * @matches must be sorted by strictly increasing length and strictly
590 * increasing offset. This is guaranteed by the match-finder.
592 * We consider each length from the minimum (3) to the longest
593 * (matches[num_matches - 1].len). For each length, we consider only
594 * the smallest offset for which that length is available. Although
595 * this is not guaranteed to be optimal due to the possibility of a
596 * larger offset costing less than a smaller offset to code, this is a
597 * very useful heuristic.
600 xpress_consider_matches(struct xpress_compressor *c,
601 struct xpress_mc_pos_data *cur_optimum_ptr,
602 const struct lz_match matches[],
603 unsigned num_matches)
606 unsigned len = XPRESS_MIN_MATCH_LEN;
610 unsigned offset_high_bit;
611 unsigned adjusted_len;
615 if (matches[num_matches - 1].len < 0xf + XPRESS_MIN_MATCH_LEN) {
616 /* All lengths are small. Optimize accordingly. */
618 offset = matches[i].offset;
619 offset_high_bit = fls32(offset);
620 len_hdr = len - XPRESS_MIN_MATCH_LEN;
621 sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
623 position_cost = cur_optimum_ptr->cost + offset_high_bit;
625 cost = position_cost + c->costs[sym];
626 if (cost < (cur_optimum_ptr + len)->cost) {
627 (cur_optimum_ptr + len)->cost = cost;
628 (cur_optimum_ptr + len)->mc_item_data =
629 (offset << MC_OFFSET_SHIFT) | len;
632 } while (++len <= matches[i].len);
633 } while (++i != num_matches);
635 /* Some lengths are big. */
637 offset = matches[i].offset;
638 offset_high_bit = fls32(offset);
639 position_cost = cur_optimum_ptr->cost + offset_high_bit;
641 adjusted_len = len - XPRESS_MIN_MATCH_LEN;
642 len_hdr = min(adjusted_len, 0xf);
643 sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
645 cost = position_cost + c->costs[sym];
646 if (adjusted_len >= 0xf) {
648 if (adjusted_len - 0xf >= 0xff)
652 if (cost < (cur_optimum_ptr + len)->cost) {
653 (cur_optimum_ptr + len)->cost = cost;
654 (cur_optimum_ptr + len)->mc_item_data =
655 (offset << MC_OFFSET_SHIFT) | len;
657 } while (++len <= matches[i].len);
658 } while (++i != num_matches);
663 * The main near-optimal parsing routine.
665 * Briefly, the algorithm does an approximate minimum-cost path search to find a
666 * "near-optimal" sequence of matches and literals to output, based on the
667 * current cost model. The algorithm steps forward, position by position (byte
668 * by byte), and updates the minimum cost path to reach each later position that
669 * can be reached using a match or literal from the current position. This is
670 * essentially Dijkstra's algorithm in disguise: the graph nodes are positions,
671 * the graph edges are possible matches/literals to code, and the cost of each
672 * edge is the estimated number of bits that will be required to output the
673 * corresponding match or literal. But one difference is that we actually
674 * compute the lowest-cost path in pieces, where each piece is terminated when
675 * there are no choices to be made.
677 * If next_chosen_item != NULL, then all items chosen will be recorded (saved in
678 * the chosen_items array). Otherwise, all items chosen will only be tallied
679 * (symbol frequencies tallied in c->freqs).
682 xpress_optim_pass(struct xpress_compressor *c,
683 struct xpress_item **next_chosen_item)
685 const u8 *window_end;
686 const u8 *window_ptr;
687 struct xpress_mc_pos_data *cur_optimum_ptr;
688 struct xpress_mc_pos_data *end_optimum_ptr;
689 const struct lz_match *matches;
690 unsigned num_matches;
691 unsigned longest_len;
695 window_ptr = c->cur_window;
696 window_end = &c->cur_window[c->cur_window_size];
699 /* Start building a new list of items, which will correspond to the next
700 * piece of the overall minimum-cost path. */
702 if (window_ptr == window_end)
705 cur_optimum_ptr = c->optimum;
706 cur_optimum_ptr->cost = 0;
707 end_optimum_ptr = cur_optimum_ptr;
709 /* The following loop runs once for each per byte in the window, except
710 * in a couple shortcut cases. */
713 /* Find matches with the current position. */
714 num_matches = xpress_get_matches(c, &matches);
718 longest_len = matches[num_matches - 1].len;
720 /* If there's a very long match, choose it immediately.
722 if (longest_len >= c->params.nice_match_length) {
724 xpress_skip_bytes(c, longest_len - 1);
725 window_ptr += longest_len;
727 if (cur_optimum_ptr != c->optimum)
728 xpress_declare_item_list(c, cur_optimum_ptr,
731 xpress_declare_match(c, longest_len,
732 matches[num_matches - 1].offset,
737 /* If reaching any positions for the first time,
738 * initialize their costs to "infinity". */
739 while (end_optimum_ptr < cur_optimum_ptr + longest_len)
740 (++end_optimum_ptr)->cost = MC_INFINITE_COST;
742 /* Consider coding a match. */
743 xpress_consider_matches(c, cur_optimum_ptr,
744 matches, num_matches);
746 /* No matches found. The only choice at this position
747 * is to code a literal. */
749 if (end_optimum_ptr == cur_optimum_ptr) {
751 /* Optimization for single literals. */
752 if (likely(cur_optimum_ptr == c->optimum)) {
753 xpress_declare_literal(c, *window_ptr++,
755 if (window_ptr == window_end)
760 (++end_optimum_ptr)->cost = MC_INFINITE_COST;
764 /* Consider coding a literal. */
765 literal = *window_ptr++;
766 cost = cur_optimum_ptr->cost + c->costs[literal];
767 if (cost < (cur_optimum_ptr + 1)->cost) {
768 (cur_optimum_ptr + 1)->cost = cost;
769 (cur_optimum_ptr + 1)->mc_item_data =
770 ((u32)literal << MC_OFFSET_SHIFT) | 1;
773 /* Advance to the next position. */
777 * This loop will terminate when either of the following
778 * conditions is true:
780 * (1) cur_optimum_ptr == end_optimum_ptr
782 * There are no paths that extend beyond the current
783 * position. In this case, any path to a later position
784 * must pass through the current position, so we can go
785 * ahead and choose the list of items that led to this
788 * (2) cur_optimum_ptr == &c->optimum[XPRESS_OPTIM_ARRAY_LENGTH]
790 * This bounds the number of times the algorithm can step
791 * forward before it is guaranteed to start choosing items.
792 * This limits the memory usage. But
793 * XPRESS_OPTIM_ARRAY_LENGTH is high enough that on most
794 * inputs this limit is never reached.
796 * Note: no check for end-of-window is needed because
797 * end-of-window will trigger condition (1).
799 if (cur_optimum_ptr == end_optimum_ptr ||
800 cur_optimum_ptr == &c->optimum[XPRESS_OPTIM_ARRAY_LENGTH])
804 /* Choose the current list of items that constitute the minimum-cost
805 * path to the current position. */
806 xpress_declare_item_list(c, cur_optimum_ptr, next_chosen_item);
810 /* Near-optimal parsing */
812 xpress_choose_near_optimal_items(struct xpress_compressor *c)
814 u32 num_passes_remaining = c->params.num_optim_passes;
815 struct xpress_item *next_chosen_item;
816 struct xpress_item **next_chosen_item_ptr;
818 /* Choose appropriate match-finder wrapper functions. */
819 if (c->params.num_optim_passes > 1) {
820 c->get_matches_func = xpress_get_matches_fillcache;
821 c->skip_bytes_func = xpress_skip_bytes_fillcache;
823 c->get_matches_func = xpress_get_matches_noncaching;
824 c->skip_bytes_func = xpress_skip_bytes_noncaching;
827 /* The first optimization pass will use a default cost model. Each
828 * additional optimization pass will use a cost model computed from the
831 * To improve performance, we only generate the array containing the
832 * matches and literals in intermediate form on the final pass. For
833 * earlier passes, tallying symbol frequencies is sufficient. */
834 xpress_set_default_costs(c->costs);
836 next_chosen_item_ptr = NULL;
838 /* Reset the match-finder wrapper. */
839 c->cache_ptr = c->cached_matches;
841 if (num_passes_remaining == 1) {
842 /* Last pass: actually generate the items. */
843 next_chosen_item = c->chosen_items;
844 next_chosen_item_ptr = &next_chosen_item;
847 /* Choose the items. */
848 xpress_optim_pass(c, next_chosen_item_ptr);
850 if (num_passes_remaining > 1) {
851 /* This isn't the last pass. */
853 /* Make the Huffman code from the symbol frequencies. */
854 c->freqs[XPRESS_END_OF_DATA]++;
855 xpress_make_huffman_code(c);
857 /* Reset symbol frequencies. */
858 memset(c->freqs, 0, sizeof(c->freqs));
860 /* Update symbol costs. */
861 xpress_set_costs(c->costs, c->lens);
863 /* Choose appopriate match-finder wrapper functions. */
864 if (c->cache_ptr <= c->cache_limit) {
865 c->get_matches_func = xpress_get_matches_usecache_nocheck;
866 c->skip_bytes_func = xpress_skip_bytes_usecache_nocheck;
868 c->get_matches_func = xpress_get_matches_usecache;
869 c->skip_bytes_func = xpress_skip_bytes_usecache;
872 } while (--num_passes_remaining);
874 /* Return the number of items chosen. */
875 return next_chosen_item - c->chosen_items;
880 xpress_choose_lazy_items(struct xpress_compressor *c)
882 const u8 *window_ptr = c->cur_window;
883 const u8 *window_end = &c->cur_window[c->cur_window_size];
884 struct xpress_item *next_chosen_item = c->chosen_items;
886 struct lz_mf *mf = c->mf;
887 struct lz_match *matches = c->cached_matches;
888 unsigned num_matches;
889 struct lz_match prev_match;
891 if (c->cur_window_size <= 8192)
892 len_3_too_far = 2048;
894 len_3_too_far = 4096;
897 /* Don't have match at previous position */
899 num_matches = lz_mf_get_matches(mf, matches);
902 if (num_matches == 0 ||
903 (matches[num_matches - 1].len == 3 &&
904 matches[num_matches - 1].offset >= len_3_too_far))
906 /* No matches found => output literal */
907 xpress_declare_literal(c, *(window_ptr - 1),
912 prev_match = matches[num_matches - 1];
915 /* Have match at previous position */
917 if (prev_match.len >= c->params.nice_match_length) {
918 /* Very long match found => output immediately */
919 xpress_declare_match(c, prev_match.len,
922 lz_mf_skip_positions(mf, prev_match.len - 1);
923 window_ptr += prev_match.len - 1;
927 num_matches = lz_mf_get_matches(mf, matches);
930 if (num_matches == 0 ||
931 (matches[num_matches - 1].len <= prev_match.len))
933 /* Next match is not longer => output previous match */
934 xpress_declare_match(c, prev_match.len,
937 lz_mf_skip_positions(mf, prev_match.len - 2);
938 window_ptr += prev_match.len - 2;
942 /* Next match is longer => output literal */
944 xpress_declare_literal(c, *(window_ptr - 2), &next_chosen_item);
946 prev_match = matches[num_matches - 1];
948 goto have_prev_match;
950 } while (window_ptr != window_end);
952 return next_chosen_item - c->chosen_items;
957 xpress_choose_greedy_items(struct xpress_compressor *c)
959 const u8 *window_ptr = c->cur_window;
960 const u8 *window_end = &c->cur_window[c->cur_window_size];
961 struct xpress_item *next_chosen_item = c->chosen_items;
963 struct lz_mf *mf = c->mf;
964 struct lz_match *matches = c->cached_matches;
965 unsigned num_matches;
967 if (c->cur_window_size <= 8192)
968 len_3_too_far = 2048;
970 len_3_too_far = 4096;
973 /* Get longest match at the current position. */
974 num_matches = lz_mf_get_matches(mf, matches);
976 if (num_matches == 0 ||
977 (matches[num_matches - 1].len == 3 &&
978 matches[num_matches - 1].offset >= len_3_too_far))
980 /* No match, or length 3 match with large offset.
981 * Choose a literal. */
982 xpress_declare_literal(c, *window_ptr, &next_chosen_item);
985 /* Match found. Choose it. */
986 unsigned len = matches[num_matches - 1].len;
987 unsigned offset = matches[num_matches - 1].offset;
989 xpress_declare_match(c, len, offset, &next_chosen_item);
990 lz_mf_skip_positions(mf, len - 1);
993 } while (window_ptr != window_end);
995 return next_chosen_item - c->chosen_items;
998 /* Literals-only parsing */
1000 xpress_choose_literals(struct xpress_compressor *c)
1002 const u8 *window_ptr = c->cur_window;
1003 const u8 *window_end = &c->cur_window[c->cur_window_size];
1004 struct xpress_item *next_chosen_item = c->chosen_items;
1007 xpress_declare_literal(c, *window_ptr++, &next_chosen_item);
1008 } while (window_ptr != window_end);
1010 return next_chosen_item - c->chosen_items;
1014 * 'choose_items_func' is provided a data buffer c->cur_window of length
1015 * c->cur_window_size bytes. This data buffer will have already been loaded
1016 * into the match-finder c->mf. 'choose_items_func' must choose the
1017 * match/literal sequence to output to represent this data buffer. The
1018 * intermediate representation of this match/literal sequence must be recorded
1019 * in c->chosen_items, and the Huffman symbols used must be tallied in c->freqs.
1020 * The return value must be the number of items written to c->chosen_items.
1023 xpress_choose_items(struct xpress_compressor *c)
1025 return (*c->params.choose_items_func)(c);
1028 /* Set internal compression parameters for the specified compression level and
1029 * maximum window size. */
1031 xpress_build_params(unsigned int compression_level, u32 max_window_size,
1032 struct xpress_compressor_params *xpress_params)
1034 memset(xpress_params, 0, sizeof(*xpress_params));
1035 xpress_params->num_optim_passes = 1;
1037 if (compression_level == 1) {
1039 /* Literal-only parsing */
1040 xpress_params->choose_items_func = xpress_choose_literals;
1041 xpress_params->mf_algo = LZ_MF_NULL;
1043 } else if (compression_level < 30) {
1045 /* Greedy parsing */
1046 xpress_params->choose_items_func = xpress_choose_greedy_items;
1047 xpress_params->mf_algo = LZ_MF_HASH_CHAINS;
1048 xpress_params->nice_match_length = compression_level;
1049 xpress_params->max_search_depth = compression_level / 2;
1051 } else if (compression_level < 60) {
1054 xpress_params->choose_items_func = xpress_choose_lazy_items;
1055 xpress_params->mf_algo = LZ_MF_HASH_CHAINS;
1056 xpress_params->nice_match_length = compression_level;
1057 xpress_params->max_search_depth = compression_level / 2;
1061 /* Near-optimal parsing */
1062 xpress_params->choose_items_func = xpress_choose_near_optimal_items;
1063 if (max_window_size >= 16384)
1064 xpress_params->mf_algo = LZ_MF_BINARY_TREES;
1066 xpress_params->mf_algo = LZ_MF_HASH_CHAINS;
1067 xpress_params->num_optim_passes = compression_level / 40;
1068 xpress_params->nice_match_length = min(compression_level / 2,
1069 XPRESS_MAX_MATCH_LEN);
1070 xpress_params->max_search_depth = min(compression_level,
1071 XPRESS_MAX_MATCH_LEN);
1075 /* Given the internal compression parameters and maximum window size, build the
1076 * Lempel-Ziv match-finder parameters. */
1078 xpress_build_mf_params(const struct xpress_compressor_params *xpress_params,
1079 u32 max_window_size, struct lz_mf_params *mf_params)
1081 memset(mf_params, 0, sizeof(*mf_params));
1083 mf_params->algorithm = xpress_params->mf_algo;
1084 mf_params->max_window_size = max_window_size;
1085 mf_params->min_match_len = XPRESS_MIN_MATCH_LEN;
1086 mf_params->max_match_len = XPRESS_MAX_MATCH_LEN;
1087 mf_params->max_search_depth = xpress_params->max_search_depth;
1088 mf_params->nice_match_len = xpress_params->nice_match_length;
1092 xpress_free_compressor(void *_c);
1095 xpress_get_needed_memory(size_t max_window_size, unsigned int compression_level)
1098 struct xpress_compressor_params params;
1100 if (max_window_size > XPRESS_MAX_OFFSET + 1)
1103 xpress_build_params(compression_level, max_window_size, ¶ms);
1105 size += sizeof(struct xpress_compressor);
1108 size += lz_mf_get_needed_memory(params.mf_algo, max_window_size);
1111 if (params.choose_items_func == xpress_choose_near_optimal_items) {
1112 size += (XPRESS_OPTIM_ARRAY_LENGTH + params.nice_match_length) *
1113 sizeof(struct xpress_mc_pos_data);
1116 /* cached_matches */
1117 if (params.num_optim_passes > 1) {
1118 size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS,
1119 params.max_search_depth + 1);
1120 size += cache_len * sizeof(struct lz_match);
1122 size += params.max_search_depth * sizeof(struct lz_match);
1126 size += max_window_size * sizeof(struct xpress_item);
1132 xpress_create_compressor(size_t max_window_size, unsigned int compression_level,
1135 struct xpress_compressor *c;
1136 struct xpress_compressor_params params;
1137 struct lz_mf_params mf_params;
1139 if (max_window_size > XPRESS_MAX_OFFSET + 1)
1140 return WIMLIB_ERR_INVALID_PARAM;
1142 xpress_build_params(compression_level, max_window_size, ¶ms);
1143 xpress_build_mf_params(¶ms, max_window_size, &mf_params);
1145 c = CALLOC(1, sizeof(struct xpress_compressor));
1151 c->mf = lz_mf_alloc(&mf_params);
1155 if (params.choose_items_func == xpress_choose_near_optimal_items) {
1156 c->optimum = MALLOC((XPRESS_OPTIM_ARRAY_LENGTH +
1157 params.nice_match_length) *
1158 sizeof(struct xpress_mc_pos_data));
1163 if (params.num_optim_passes > 1) {
1164 size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS,
1165 params.max_search_depth + 1);
1166 c->cached_matches = MALLOC(cache_len * sizeof(struct lz_match));
1167 if (!c->cached_matches)
1169 c->cache_limit = c->cached_matches + cache_len -
1170 (params.max_search_depth + 1);
1172 c->cached_matches = MALLOC(params.max_search_depth *
1173 sizeof(struct lz_match));
1174 if (!c->cached_matches)
1178 c->chosen_items = MALLOC(max_window_size * sizeof(struct xpress_item));
1179 if (!c->chosen_items)
1186 xpress_free_compressor(c);
1187 return WIMLIB_ERR_NOMEM;
1191 xpress_compress(const void *uncompressed_data, size_t uncompressed_size,
1192 void *compressed_data, size_t compressed_size_avail, void *_c)
1194 struct xpress_compressor *c = _c;
1195 u32 num_chosen_items;
1197 struct xpress_output_bitstream os;
1198 u32 compressed_size;
1200 /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's
1201 * impossible to compress 256 bytes or less of data to less than the
1203 if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 50)
1206 /* Determine match/literal sequence. */
1207 c->cur_window = uncompressed_data;
1208 c->cur_window_size = uncompressed_size;
1209 lz_mf_load_window(c->mf, c->cur_window, c->cur_window_size);
1210 memset(c->freqs, 0, sizeof(c->freqs));
1212 num_chosen_items = xpress_choose_items(c);
1214 c->freqs[XPRESS_END_OF_DATA]++;
1215 xpress_make_huffman_code(c);
1217 /* Output the Huffman code as a series of 512 4-bit lengths. */
1218 cptr = compressed_data;
1219 for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i += 2)
1220 *cptr++ = (c->lens[i + 1] << 4) | c->lens[i];
1222 /* Output the encoded matches/literals. */
1223 xpress_init_output(&os, cptr,
1224 compressed_size_avail - XPRESS_NUM_SYMBOLS / 2);
1225 xpress_write_items(&os, c->chosen_items, num_chosen_items,
1226 c->codewords, c->lens);
1228 /* Flush any pending data and get the length of the compressed data. */
1229 compressed_size = xpress_flush_output(&os);
1230 if (compressed_size == 0)
1233 /* Return the length of the compressed data. */
1234 return compressed_size + XPRESS_NUM_SYMBOLS / 2;
1238 xpress_free_compressor(void *_c)
1240 struct xpress_compressor *c = _c;
1245 FREE(c->cached_matches);
1246 FREE(c->chosen_items);
1251 const struct compressor_ops xpress_compressor_ops = {
1252 .get_needed_memory = xpress_get_needed_memory,
1253 .create_compressor = xpress_create_compressor,
1254 .compress = xpress_compress,
1255 .free_compressor = xpress_free_compressor,