4 * A compressor for the LZMS compression format.
8 * Copyright (C) 2013, 2014, 2015 Eric Biggers
10 * This file is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU Lesser General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option) any
15 * This file is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this file; if not, see http://www.gnu.org/licenses/.
31 #include "wimlib/compress_common.h"
32 #include "wimlib/compressor_ops.h"
33 #include "wimlib/error.h"
34 #include "wimlib/lcpit_matchfinder.h"
35 #include "wimlib/lz_extend.h"
36 #include "wimlib/lz_hash.h"
37 #include "wimlib/lzms_common.h"
38 #include "wimlib/unaligned.h"
39 #include "wimlib/util.h"
42 * MAX_FAST_LENGTH is the maximum match length for which the length slot can be
43 * looked up directly in 'fast_length_slot_tab' and the length cost can be
44 * looked up directly in 'fast_length_cost_tab'.
46 * We also limit the 'nice_match_len' parameter to this value. Consequently, if
47 * the longest match found is shorter than 'nice_match_len', then it must also
48 * be shorter than MAX_FAST_LENGTH. This makes it possible to do fast lookups
49 * of length costs using 'fast_length_cost_tab' without having to keep checking
50 * whether the length exceeds MAX_FAST_LENGTH or not.
52 #define MAX_FAST_LENGTH 255
54 /* NUM_OPTIM_NODES is the maximum number of bytes the parsing algorithm will
55 * step forward before forcing the pending items to be encoded. If this value
56 * is increased, then there will be fewer forced flushes, but the probability
57 * entries and Huffman codes will be more likely to become outdated. */
58 #define NUM_OPTIM_NODES 2048
60 /* COST_SHIFT is a scaling factor that makes it possible to consider fractional
61 * bit costs. A single bit has a cost of (1 << COST_SHIFT). */
64 /* Length of the hash table for finding delta matches */
65 #define DELTA_HASH_ORDER 17
66 #define DELTA_HASH_LENGTH ((u32)1 << DELTA_HASH_ORDER)
68 /* The number of bytes to hash when finding delta matches; also taken to be the
69 * minimum length of an explicit offset delta match */
70 #define NBYTES_HASHED_FOR_DELTA 3
72 /* The number of delta match powers to consider (must be <=
73 * LZMS_NUM_DELTA_POWER_SYMS) */
74 #define NUM_POWERS_TO_CONSIDER 6
76 /* This structure tracks the state of writing bits as a series of 16-bit coding
77 * units, starting at the end of the output buffer and proceeding backwards. */
78 struct lzms_output_bitstream {
80 /* Bits that haven't yet been written to the output buffer */
83 /* Number of bits currently held in @bitbuf */
86 /* Pointer to one past the next position in the output buffer at which
87 * to output a 16-bit coding unit */
90 /* Pointer to the beginning of the output buffer (this is the "end" when
91 * writing backwards!) */
95 /* This structure tracks the state of range encoding and its output, which
96 * starts at the beginning of the output buffer and proceeds forwards. */
97 struct lzms_range_encoder {
99 /* The lower boundary of the current range. Logically, this is a 33-bit
100 * integer whose high bit is needed to detect carries. */
103 /* The size of the current range */
106 /* The next 16-bit coding unit to output */
109 /* The number of 16-bit coding units whose output has been delayed due
110 * to possible carrying. The first such coding unit is @cache; all
111 * subsequent such coding units are 0xffff. */
114 /* Pointer to the beginning of the output buffer */
117 /* Pointer to the position in the output buffer at which the next coding
118 * unit must be written */
121 /* Pointer to just past the end of the output buffer */
125 /* Bookkeeping information for an adaptive Huffman code */
126 struct lzms_huffman_rebuild_info {
128 /* The remaining number of symbols to encode until this code must be
130 unsigned num_syms_until_rebuild;
132 /* The number of symbols in this code */
135 /* The rebuild frequency of this code, in symbols */
136 unsigned rebuild_freq;
138 /* The Huffman codeword of each symbol in this code */
141 /* The length of each Huffman codeword, in bits */
144 /* The frequency of each symbol in this code */
149 * The compressor-internal representation of a match or literal.
151 * Literals have length=1; matches have length > 1. (We disallow matches of
152 * length 1, even though this is a valid length in LZMS.)
154 * The source is encoded as follows:
156 * - Literals: the literal byte itself
157 * - Explicit offset LZ matches: the match offset plus (LZMS_NUM_LZ_REPS - 1)
158 * - Repeat offset LZ matches: the index of the offset in recent_lz_offsets
159 * - Explicit offset delta matches: DELTA_SOURCE_TAG is set, the next 3 bits are
160 * the power, and the remainder is the raw offset plus (LZMS_NUM_DELTA_REPS-1)
161 * - Repeat offset delta matches: DELTA_SOURCE_TAG is set, and the remainder is
162 * the index of the (power, raw_offset) pair in recent_delta_pairs
169 #define DELTA_SOURCE_TAG ((u32)1 << 31)
170 #define DELTA_SOURCE_POWER_SHIFT 28
171 #define DELTA_SOURCE_RAW_OFFSET_MASK (((u32)1 << DELTA_SOURCE_POWER_SHIFT) - 1)
174 check_that_powers_fit_in_bitfield(void)
176 BUILD_BUG_ON(LZMS_NUM_DELTA_POWER_SYMS > (1 << (31 - DELTA_SOURCE_POWER_SHIFT)));
179 /* A stripped-down version of the adaptive state in LZMS which excludes the
180 * probability entries and Huffman codes */
181 struct lzms_adaptive_state {
183 /* Recent offsets for LZ matches */
184 u32 recent_lz_offsets[LZMS_NUM_LZ_REPS + 1];
185 u32 prev_lz_offset; /* 0 means none */
186 u32 upcoming_lz_offset; /* 0 means none */
188 /* Recent (power, raw offset) pairs for delta matches.
189 * The low DELTA_SOURCE_POWER_SHIFT bits of each entry are the raw
190 * offset, and the high bits are the power. */
191 u32 recent_delta_pairs[LZMS_NUM_DELTA_REPS + 1];
192 u32 prev_delta_pair; /* 0 means none */
193 u32 upcoming_delta_pair; /* 0 means none */
195 /* States for predicting the probabilities of item types */
199 u8 lz_rep_states[LZMS_NUM_LZ_REP_DECISIONS];
201 u8 delta_rep_states[LZMS_NUM_DELTA_REP_DECISIONS];
202 } _aligned_attribute(64);
205 * This structure represents a byte position in the preprocessed input data and
206 * a node in the graph of possible match/literal choices.
208 * Logically, each incoming edge to this node is labeled with a literal or a
209 * match that can be taken to reach this position from an earlier position; and
210 * each outgoing edge from this node is labeled with a literal or a match that
211 * can be taken to advance from this position to a later position.
213 struct lzms_optimum_node {
216 * The cost of the lowest-cost path that has been found to reach this
217 * position. This can change as progressively lower cost paths are
218 * found to reach this position.
221 #define INFINITE_COST UINT32_MAX
224 * @item is the last item that was taken to reach this position to reach
225 * it with the stored @cost. This can change as progressively lower
226 * cost paths are found to reach this position.
228 * In some cases we look ahead more than one item. If we looked ahead n
229 * items to reach this position, then @item is the last item taken,
230 * @extra_items contains the other items ordered from second-to-last to
231 * first, and @num_extra_items is n - 1.
233 unsigned num_extra_items;
234 struct lzms_item item;
235 struct lzms_item extra_items[2];
238 * The adaptive state that exists at this position. This is filled in
239 * lazily, only after the minimum-cost path to this position is found.
241 * Note: the way the algorithm handles this adaptive state in the
242 * "minimum-cost" parse is actually only an approximation. It's
243 * possible for the globally optimal, minimum cost path to contain a
244 * prefix, ending at a position, where that path prefix is *not* the
245 * minimum cost path to that position. This can happen if such a path
246 * prefix results in a different adaptive state which results in lower
247 * costs later. Although the algorithm does do some heuristic
248 * multi-item lookaheads, it does not solve this problem in general.
250 * Note: this adaptive state structure also does not include the
251 * probability entries or current Huffman codewords. Those aren't
252 * maintained per-position and are only updated occassionally.
254 struct lzms_adaptive_state state;
255 } _aligned_attribute(64);
257 /* The main compressor structure */
258 struct lzms_compressor {
260 /* The matchfinder for LZ matches */
261 struct lcpit_matchfinder mf;
263 /* The preprocessed buffer of data being compressed */
266 /* The number of bytes of data to be compressed, which is the number of
267 * bytes of data in @in_buffer that are actually valid */
271 * Boolean flags to enable consideration of various types of multi-step
272 * operations during parsing.
274 * Among other cases, multi-step operations can help with gaps where two
275 * matches are separated by a non-matching byte.
277 * This idea is borrowed from Igor Pavlov's LZMA encoder.
280 bool try_lzrep_lit_lzrep0;
281 bool try_lzmatch_lit_lzrep0;
284 * If true, the compressor can use delta matches. This slows down
285 * compression. It improves the compression ratio greatly, slightly, or
286 * not at all, depending on the input data.
288 bool use_delta_matches;
290 /* If true, the compressor need not preserve the input buffer if it
291 * compresses the data successfully. */
294 /* 'last_target_usages' is a large array that is only needed for
295 * preprocessing, so it is in union with fields that don't need to be
296 * initialized until after preprocessing. */
300 /* Temporary space to store matches found by the LZ matchfinder */
301 struct lz_match matches[MAX_FAST_LENGTH - LZMS_MIN_MATCH_LENGTH + 1];
303 /* Hash table for finding delta matches */
304 u32 delta_hash_table[DELTA_HASH_LENGTH];
306 /* For each delta power, the hash code for the next sequence */
307 u32 next_delta_hashes[NUM_POWERS_TO_CONSIDER];
309 /* The per-byte graph nodes for near-optimal parsing */
310 struct lzms_optimum_node optimum_nodes[NUM_OPTIM_NODES + MAX_FAST_LENGTH];
312 /* Table: length => current cost for small match lengths */
313 u32 fast_length_cost_tab[MAX_FAST_LENGTH + 1];
315 /* Range encoder which outputs to the beginning of the compressed data
316 * buffer, proceeding forwards */
317 struct lzms_range_encoder rc;
319 /* Bitstream which outputs to the end of the compressed data buffer,
320 * proceeding backwards */
321 struct lzms_output_bitstream os;
323 /* States and probability entries for item type disambiguation */
325 unsigned match_state;
327 unsigned lz_rep_states[LZMS_NUM_LZ_REP_DECISIONS];
328 unsigned delta_state;
329 unsigned delta_rep_states[LZMS_NUM_DELTA_REP_DECISIONS];
330 struct lzms_probability_entry main_probs[LZMS_NUM_MAIN_PROBS];
331 struct lzms_probability_entry match_probs[LZMS_NUM_MATCH_PROBS];
332 struct lzms_probability_entry lz_probs[LZMS_NUM_LZ_PROBS];
333 struct lzms_probability_entry lz_rep_probs[LZMS_NUM_LZ_REP_DECISIONS]
334 [LZMS_NUM_LZ_REP_PROBS];
335 struct lzms_probability_entry delta_probs[LZMS_NUM_DELTA_PROBS];
336 struct lzms_probability_entry delta_rep_probs[LZMS_NUM_DELTA_REP_DECISIONS]
337 [LZMS_NUM_DELTA_REP_PROBS];
341 struct lzms_huffman_rebuild_info literal_rebuild_info;
342 u32 literal_codewords[LZMS_NUM_LITERAL_SYMS];
343 u8 literal_lens[LZMS_NUM_LITERAL_SYMS];
344 u32 literal_freqs[LZMS_NUM_LITERAL_SYMS];
346 struct lzms_huffman_rebuild_info lz_offset_rebuild_info;
347 u32 lz_offset_codewords[LZMS_MAX_NUM_OFFSET_SYMS];
348 u8 lz_offset_lens[LZMS_MAX_NUM_OFFSET_SYMS];
349 u32 lz_offset_freqs[LZMS_MAX_NUM_OFFSET_SYMS];
351 struct lzms_huffman_rebuild_info length_rebuild_info;
352 u32 length_codewords[LZMS_NUM_LENGTH_SYMS];
353 u8 length_lens[LZMS_NUM_LENGTH_SYMS];
354 u32 length_freqs[LZMS_NUM_LENGTH_SYMS];
356 struct lzms_huffman_rebuild_info delta_offset_rebuild_info;
357 u32 delta_offset_codewords[LZMS_MAX_NUM_OFFSET_SYMS];
358 u8 delta_offset_lens[LZMS_MAX_NUM_OFFSET_SYMS];
359 u32 delta_offset_freqs[LZMS_MAX_NUM_OFFSET_SYMS];
361 struct lzms_huffman_rebuild_info delta_power_rebuild_info;
362 u32 delta_power_codewords[LZMS_NUM_DELTA_POWER_SYMS];
363 u8 delta_power_lens[LZMS_NUM_DELTA_POWER_SYMS];
364 u32 delta_power_freqs[LZMS_NUM_DELTA_POWER_SYMS];
368 s32 last_target_usages[65536];
372 /* Table: length => length slot for small match lengths */
373 u8 fast_length_slot_tab[MAX_FAST_LENGTH + 1];
375 /* Tables for mapping offsets to offset slots */
377 /* slots [0, 167); 0 <= num_extra_bits <= 10 */
378 u8 offset_slot_tab_1[0xe4a5];
380 /* slots [167, 427); 11 <= num_extra_bits <= 15 */
381 u16 offset_slot_tab_2[0x3d0000 >> 11];
383 /* slots [427, 799); 16 <= num_extra_bits */
384 u16 offset_slot_tab_3[((LZMS_MAX_MATCH_OFFSET + 1) - 0xe4a5) >> 16];
387 /******************************************************************************
388 * Offset and length slot acceleration *
389 ******************************************************************************/
391 /* Generate the acceleration table for length slots. */
393 lzms_init_fast_length_slot_tab(struct lzms_compressor *c)
396 for (u32 len = LZMS_MIN_MATCH_LENGTH; len <= MAX_FAST_LENGTH; len++) {
397 if (len >= lzms_length_slot_base[slot + 1])
399 c->fast_length_slot_tab[len] = slot;
403 /* Generate the acceleration tables for offset slots. */
405 lzms_init_offset_slot_tabs(struct lzms_compressor *c)
410 /* slots [0, 167); 0 <= num_extra_bits <= 10 */
411 for (offset = 1; offset < 0xe4a5; offset++) {
412 if (offset >= lzms_offset_slot_base[slot + 1])
414 c->offset_slot_tab_1[offset] = slot;
417 /* slots [167, 427); 11 <= num_extra_bits <= 15 */
418 for (; offset < 0x3de4a5; offset += (u32)1 << 11) {
419 if (offset >= lzms_offset_slot_base[slot + 1])
421 c->offset_slot_tab_2[(offset - 0xe4a5) >> 11] = slot;
424 /* slots [427, 799); 16 <= num_extra_bits */
425 for (; offset < LZMS_MAX_MATCH_OFFSET + 1; offset += (u32)1 << 16) {
426 if (offset >= lzms_offset_slot_base[slot + 1])
428 c->offset_slot_tab_3[(offset - 0xe4a5) >> 16] = slot;
433 * Return the length slot for the specified match length, using the compressor's
434 * acceleration table if the length is small enough.
436 static inline unsigned
437 lzms_comp_get_length_slot(const struct lzms_compressor *c, u32 length)
439 if (likely(length <= MAX_FAST_LENGTH))
440 return c->fast_length_slot_tab[length];
441 return lzms_get_length_slot(length);
445 * Return the offset slot for the specified match offset, using the compressor's
446 * acceleration tables to speed up the mapping.
448 static inline unsigned
449 lzms_comp_get_offset_slot(const struct lzms_compressor *c, u32 offset)
452 return c->offset_slot_tab_1[offset];
454 if (offset < 0x3d0000)
455 return c->offset_slot_tab_2[offset >> 11];
456 return c->offset_slot_tab_3[offset >> 16];
459 /******************************************************************************
461 ******************************************************************************/
464 * Initialize the range encoder @rc to write forwards to the specified buffer
465 * @out that is @count 16-bit integers long.
468 lzms_range_encoder_init(struct lzms_range_encoder *rc, le16 *out, size_t count)
471 rc->range_size = 0xffffffff;
476 rc->end = out + count;
480 * Attempt to flush bits from the range encoder.
482 * The basic idea is that we're writing bits from @rc->lower_bound to the
483 * output. However, due to carrying, the writing of coding units with the
484 * maximum value, as well as one prior coding unit, must be delayed until it is
485 * determined whether a carry is needed.
487 * This is based on the public domain code for LZMA written by Igor Pavlov, but
488 * with the following differences:
490 * - In LZMS, 16-bit coding units are required rather than 8-bit.
492 * - In LZMS, the first coding unit is not ignored by the decompressor, so
493 * the encoder cannot output a dummy value to that position.
496 lzms_range_encoder_shift_low(struct lzms_range_encoder *rc)
498 if ((u32)(rc->lower_bound) < 0xffff0000 ||
499 (u32)(rc->lower_bound >> 32) != 0)
501 /* Carry not needed (rc->lower_bound < 0xffff0000), or carry
502 * occurred ((rc->lower_bound >> 32) != 0, a.k.a. the carry bit
505 if (likely(rc->next >= rc->begin)) {
506 if (rc->next != rc->end) {
507 put_unaligned_u16_le(rc->cache +
508 (u16)(rc->lower_bound >> 32),
515 } while (--rc->cache_size != 0);
517 rc->cache = (rc->lower_bound >> 16) & 0xffff;
520 rc->lower_bound = (rc->lower_bound & 0xffff) << 16;
524 lzms_range_encoder_flush(struct lzms_range_encoder *rc)
526 for (int i = 0; i < 4; i++)
527 lzms_range_encoder_shift_low(rc);
528 return rc->next != rc->end;
532 * Encode the next bit using the range encoder.
534 * @prob is the probability out of LZMS_PROBABILITY_DENOMINATOR that the next
535 * bit is 0 rather than 1.
538 lzms_range_encode_bit(struct lzms_range_encoder *rc, int bit, u32 prob)
540 /* Normalize if needed. */
541 if (rc->range_size <= 0xffff) {
542 rc->range_size <<= 16;
543 lzms_range_encoder_shift_low(rc);
546 u32 bound = (rc->range_size >> LZMS_PROBABILITY_BITS) * prob;
548 rc->range_size = bound;
550 rc->lower_bound += bound;
551 rc->range_size -= bound;
556 * Encode a bit. This wraps around lzms_range_encode_bit() to handle using and
557 * updating the state and its corresponding probability entry.
560 lzms_encode_bit(int bit, unsigned *state_p, unsigned num_states,
561 struct lzms_probability_entry *probs,
562 struct lzms_range_encoder *rc)
564 struct lzms_probability_entry *prob_entry;
567 /* Load the probability entry for the current state. */
568 prob_entry = &probs[*state_p];
570 /* Update the state based on the next bit. */
571 *state_p = ((*state_p << 1) | bit) & (num_states - 1);
573 /* Get the probability that the bit is 0. */
574 prob = lzms_get_probability(prob_entry);
576 /* Update the probability entry. */
577 lzms_update_probability_entry(prob_entry, bit);
579 /* Encode the bit using the range encoder. */
580 lzms_range_encode_bit(rc, bit, prob);
583 /* Helper functions for encoding bits in the various decision classes */
586 lzms_encode_main_bit(struct lzms_compressor *c, int bit)
588 lzms_encode_bit(bit, &c->main_state, LZMS_NUM_MAIN_PROBS,
589 c->main_probs, &c->rc);
593 lzms_encode_match_bit(struct lzms_compressor *c, int bit)
595 lzms_encode_bit(bit, &c->match_state, LZMS_NUM_MATCH_PROBS,
596 c->match_probs, &c->rc);
600 lzms_encode_lz_bit(struct lzms_compressor *c, int bit)
602 lzms_encode_bit(bit, &c->lz_state, LZMS_NUM_LZ_PROBS,
603 c->lz_probs, &c->rc);
607 lzms_encode_lz_rep_bit(struct lzms_compressor *c, int bit, int idx)
609 lzms_encode_bit(bit, &c->lz_rep_states[idx], LZMS_NUM_LZ_REP_PROBS,
610 c->lz_rep_probs[idx], &c->rc);
614 lzms_encode_delta_bit(struct lzms_compressor *c, int bit)
616 lzms_encode_bit(bit, &c->delta_state, LZMS_NUM_DELTA_PROBS,
617 c->delta_probs, &c->rc);
621 lzms_encode_delta_rep_bit(struct lzms_compressor *c, int bit, int idx)
623 lzms_encode_bit(bit, &c->delta_rep_states[idx], LZMS_NUM_DELTA_REP_PROBS,
624 c->delta_rep_probs[idx], &c->rc);
627 /******************************************************************************
628 * Huffman encoding and verbatim bits *
629 ******************************************************************************/
632 * Initialize the output bitstream @os to write backwards to the specified
633 * buffer @out that is @count 16-bit integers long.
636 lzms_output_bitstream_init(struct lzms_output_bitstream *os,
637 le16 *out, size_t count)
641 os->next = out + count;
646 * Write some bits, contained in the low-order @num_bits bits of @bits, to the
647 * output bitstream @os.
649 * @max_num_bits is a compile-time constant that specifies the maximum number of
650 * bits that can ever be written at this call site.
653 lzms_write_bits(struct lzms_output_bitstream *os, const u32 bits,
654 const unsigned num_bits, const unsigned max_num_bits)
656 /* Add the bits to the bit buffer variable. */
657 os->bitcount += num_bits;
658 os->bitbuf = (os->bitbuf << num_bits) | bits;
660 /* Check whether any coding units need to be written. */
661 while (os->bitcount >= 16) {
665 /* Write a coding unit, unless it would underflow the buffer. */
666 if (os->next != os->begin)
667 put_unaligned_u16_le(os->bitbuf >> os->bitcount, --os->next);
669 /* Optimization for call sites that never write more than 16
671 if (max_num_bits <= 16)
677 * Flush the output bitstream, ensuring that all bits written to it have been
678 * written to memory. Return %true if all bits have been output successfully,
679 * or %false if an overrun occurred.
682 lzms_output_bitstream_flush(struct lzms_output_bitstream *os)
684 if (os->next == os->begin)
687 if (os->bitcount != 0)
688 put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), --os->next);
694 lzms_build_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info)
696 make_canonical_huffman_code(rebuild_info->num_syms,
697 LZMS_MAX_CODEWORD_LENGTH,
700 rebuild_info->codewords);
701 rebuild_info->num_syms_until_rebuild = rebuild_info->rebuild_freq;
705 lzms_init_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info,
706 unsigned num_syms, unsigned rebuild_freq,
707 u32 *codewords, u8 *lens, u32 *freqs)
709 rebuild_info->num_syms = num_syms;
710 rebuild_info->rebuild_freq = rebuild_freq;
711 rebuild_info->codewords = codewords;
712 rebuild_info->lens = lens;
713 rebuild_info->freqs = freqs;
714 lzms_init_symbol_frequencies(freqs, num_syms);
715 lzms_build_huffman_code(rebuild_info);
719 lzms_rebuild_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info)
721 lzms_build_huffman_code(rebuild_info);
722 lzms_dilute_symbol_frequencies(rebuild_info->freqs, rebuild_info->num_syms);
726 * Encode a symbol using the specified Huffman code. Then, if the Huffman code
727 * needs to be rebuilt, rebuild it and return true; otherwise return false.
730 lzms_huffman_encode_symbol(unsigned sym,
731 const u32 *codewords, const u8 *lens, u32 *freqs,
732 struct lzms_output_bitstream *os,
733 struct lzms_huffman_rebuild_info *rebuild_info)
735 lzms_write_bits(os, codewords[sym], lens[sym], LZMS_MAX_CODEWORD_LENGTH);
737 if (--rebuild_info->num_syms_until_rebuild == 0) {
738 lzms_rebuild_huffman_code(rebuild_info);
744 /* Helper routines to encode symbols using the various Huffman codes */
747 lzms_encode_literal_symbol(struct lzms_compressor *c, unsigned sym)
749 return lzms_huffman_encode_symbol(sym, c->literal_codewords,
750 c->literal_lens, c->literal_freqs,
751 &c->os, &c->literal_rebuild_info);
755 lzms_encode_lz_offset_symbol(struct lzms_compressor *c, unsigned sym)
757 return lzms_huffman_encode_symbol(sym, c->lz_offset_codewords,
758 c->lz_offset_lens, c->lz_offset_freqs,
759 &c->os, &c->lz_offset_rebuild_info);
763 lzms_encode_length_symbol(struct lzms_compressor *c, unsigned sym)
765 return lzms_huffman_encode_symbol(sym, c->length_codewords,
766 c->length_lens, c->length_freqs,
767 &c->os, &c->length_rebuild_info);
771 lzms_encode_delta_offset_symbol(struct lzms_compressor *c, unsigned sym)
773 return lzms_huffman_encode_symbol(sym, c->delta_offset_codewords,
774 c->delta_offset_lens, c->delta_offset_freqs,
775 &c->os, &c->delta_offset_rebuild_info);
779 lzms_encode_delta_power_symbol(struct lzms_compressor *c, unsigned sym)
781 return lzms_huffman_encode_symbol(sym, c->delta_power_codewords,
782 c->delta_power_lens, c->delta_power_freqs,
783 &c->os, &c->delta_power_rebuild_info);
787 lzms_update_fast_length_costs(struct lzms_compressor *c);
790 * Encode a match length. If this causes the Huffman code for length symbols to
791 * be rebuilt, also update the length costs array used by the parser.
794 lzms_encode_length(struct lzms_compressor *c, u32 length)
796 unsigned slot = lzms_comp_get_length_slot(c, length);
798 if (lzms_encode_length_symbol(c, slot))
799 lzms_update_fast_length_costs(c);
801 lzms_write_bits(&c->os, length - lzms_length_slot_base[slot],
802 lzms_extra_length_bits[slot],
803 LZMS_MAX_EXTRA_LENGTH_BITS);
806 /* Encode the offset of an LZ match. */
808 lzms_encode_lz_offset(struct lzms_compressor *c, u32 offset)
810 unsigned slot = lzms_comp_get_offset_slot(c, offset);
812 lzms_encode_lz_offset_symbol(c, slot);
813 lzms_write_bits(&c->os, offset - lzms_offset_slot_base[slot],
814 lzms_extra_offset_bits[slot],
815 LZMS_MAX_EXTRA_OFFSET_BITS);
818 /* Encode the raw offset of a delta match. */
820 lzms_encode_delta_raw_offset(struct lzms_compressor *c, u32 raw_offset)
822 unsigned slot = lzms_comp_get_offset_slot(c, raw_offset);
824 lzms_encode_delta_offset_symbol(c, slot);
825 lzms_write_bits(&c->os, raw_offset - lzms_offset_slot_base[slot],
826 lzms_extra_offset_bits[slot],
827 LZMS_MAX_EXTRA_OFFSET_BITS);
830 /******************************************************************************
832 ******************************************************************************/
834 /* Encode the specified item, which may be a literal or any type of match. */
836 lzms_encode_item(struct lzms_compressor *c, u32 length, u32 source)
838 /* Main bit: 0 = literal, 1 = match */
839 int main_bit = (length > 1);
840 lzms_encode_main_bit(c, main_bit);
844 unsigned literal = source;
845 lzms_encode_literal_symbol(c, literal);
849 /* Match bit: 0 = LZ match, 1 = delta match */
850 int match_bit = (source & DELTA_SOURCE_TAG) != 0;
851 lzms_encode_match_bit(c, match_bit);
856 /* LZ bit: 0 = explicit offset, 1 = repeat offset */
857 int lz_bit = (source < LZMS_NUM_LZ_REPS);
858 lzms_encode_lz_bit(c, lz_bit);
861 /* Explicit offset LZ match */
862 u32 offset = source - (LZMS_NUM_LZ_REPS - 1);
863 lzms_encode_lz_offset(c, offset);
865 /* Repeat offset LZ match */
866 int rep_idx = source;
867 for (int i = 0; i < rep_idx; i++)
868 lzms_encode_lz_rep_bit(c, 1, i);
869 if (rep_idx < LZMS_NUM_LZ_REP_DECISIONS)
870 lzms_encode_lz_rep_bit(c, 0, rep_idx);
875 source &= ~DELTA_SOURCE_TAG;
877 /* Delta bit: 0 = explicit offset, 1 = repeat offset */
878 int delta_bit = (source < LZMS_NUM_DELTA_REPS);
879 lzms_encode_delta_bit(c, delta_bit);
882 /* Explicit offset delta match */
883 u32 power = source >> DELTA_SOURCE_POWER_SHIFT;
884 u32 raw_offset = (source & DELTA_SOURCE_RAW_OFFSET_MASK) -
885 (LZMS_NUM_DELTA_REPS - 1);
886 lzms_encode_delta_power_symbol(c, power);
887 lzms_encode_delta_raw_offset(c, raw_offset);
889 /* Repeat offset delta match */
890 int rep_idx = source;
891 for (int i = 0; i < rep_idx; i++)
892 lzms_encode_delta_rep_bit(c, 1, i);
893 if (rep_idx < LZMS_NUM_DELTA_REP_DECISIONS)
894 lzms_encode_delta_rep_bit(c, 0, rep_idx);
898 /* Match length (encoded the same way for any match type) */
899 lzms_encode_length(c, length);
903 /* Encode a list of matches and literals chosen by the parsing algorithm. */
905 lzms_encode_nonempty_item_list(struct lzms_compressor *c,
906 struct lzms_optimum_node *end_node)
908 /* Since we've stored at each node the item we took to arrive at that
909 * node, we can trace our chosen path in backwards order. However, for
910 * encoding we need to trace our chosen path in forwards order. To make
911 * this possible, the following loop moves the items from their
912 * destination nodes to their source nodes, which effectively reverses
913 * the path. (Think of it like reversing a singly-linked list.) */
914 struct lzms_optimum_node *cur_node = end_node;
915 struct lzms_item saved_item = cur_node->item;
917 struct lzms_item item = saved_item;
918 if (cur_node->num_extra_items > 0) {
919 /* Handle an arrival via multi-item lookahead. */
921 struct lzms_optimum_node *orig_node = cur_node;
923 cur_node -= item.length;
924 cur_node->item = item;
925 item = orig_node->extra_items[i];
926 } while (++i != orig_node->num_extra_items);
928 cur_node -= item.length;
929 saved_item = cur_node->item;
930 cur_node->item = item;
931 } while (cur_node != c->optimum_nodes);
933 /* Now trace the chosen path in forwards order, encoding each item. */
935 lzms_encode_item(c, cur_node->item.length, cur_node->item.source);
936 cur_node += cur_node->item.length;
937 } while (cur_node != end_node);
941 lzms_encode_item_list(struct lzms_compressor *c,
942 struct lzms_optimum_node *end_node)
944 if (end_node != c->optimum_nodes)
945 lzms_encode_nonempty_item_list(c, end_node);
948 /******************************************************************************
950 ******************************************************************************/
953 * If p is the predicted probability of the next bit being a 0, then the number
954 * of bits required to encode a 0 bit using a binary range encoder is the real
955 * number -log2(p), and the number of bits required to encode a 1 bit is the
956 * real number -log2(1 - p). To avoid computing either of these expressions at
957 * runtime, 'lzms_bit_costs' is a precomputed table that stores a mapping from
958 * probability to cost for each possible probability. Specifically, the array
959 * indices are the numerators of the possible probabilities in LZMS, where the
960 * denominators are LZMS_PROBABILITY_DENOMINATOR; and the stored costs are the
961 * bit costs multiplied by 1<<COST_SHIFT and rounded to the nearest integer.
962 * Furthermore, the values stored for 0% and 100% probabilities are equal to the
963 * adjacent values, since these probabilities are not actually permitted. This
964 * allows us to use the num_recent_zero_bits value from the
965 * lzms_probability_entry as the array index without fixing up these two special
968 static const u32 lzms_bit_costs[LZMS_PROBABILITY_DENOMINATOR + 1] = {
969 384, 384, 320, 283, 256, 235, 219, 204,
970 192, 181, 171, 163, 155, 147, 140, 134,
971 128, 122, 117, 112, 107, 103, 99, 94,
972 91, 87, 83, 80, 76, 73, 70, 67,
973 64, 61, 58, 56, 53, 51, 48, 46,
974 43, 41, 39, 37, 35, 33, 30, 29,
975 27, 25, 23, 21, 19, 17, 16, 14,
976 12, 11, 9, 8, 6, 4, 3, 1,
981 check_cost_shift(void)
983 /* lzms_bit_costs is hard-coded to the current COST_SHIFT. */
984 BUILD_BUG_ON(COST_SHIFT != 6);
991 lzms_compute_bit_costs(void)
993 for (u32 i = 0; i <= LZMS_PROBABILITY_DENOMINATOR; i++) {
997 else if (prob == LZMS_PROBABILITY_DENOMINATOR)
1000 lzms_bit_costs[i] = round(-log2((double)prob / LZMS_PROBABILITY_DENOMINATOR) *
1006 /* Return the cost to encode a 0 bit in the specified context. */
1008 lzms_bit_0_cost(unsigned state, const struct lzms_probability_entry *probs)
1010 return lzms_bit_costs[probs[state].num_recent_zero_bits];
1013 /* Return the cost to encode a 1 bit in the specified context. */
1015 lzms_bit_1_cost(unsigned state, const struct lzms_probability_entry *probs)
1017 return lzms_bit_costs[LZMS_PROBABILITY_DENOMINATOR -
1018 probs[state].num_recent_zero_bits];
1021 /* Return the cost to encode a literal, including the main bit. */
1023 lzms_literal_cost(struct lzms_compressor *c, unsigned main_state, unsigned literal)
1025 return lzms_bit_0_cost(main_state, c->main_probs) +
1026 ((u32)c->literal_lens[literal] << COST_SHIFT);
1029 /* Update 'fast_length_cost_tab' to use the latest Huffman code. */
1031 lzms_update_fast_length_costs(struct lzms_compressor *c)
1035 for (u32 len = LZMS_MIN_MATCH_LENGTH; len <= MAX_FAST_LENGTH; len++) {
1036 if (len >= lzms_length_slot_base[slot + 1]) {
1038 cost = (u32)(c->length_lens[slot] +
1039 lzms_extra_length_bits[slot]) << COST_SHIFT;
1041 c->fast_length_cost_tab[len] = cost;
1045 /* Return the cost to encode the specified match length, which must not exceed
1046 * MAX_FAST_LENGTH. */
1048 lzms_fast_length_cost(const struct lzms_compressor *c, u32 length)
1050 return c->fast_length_cost_tab[length];
1053 /* Return the cost to encode the specified LZ match offset. */
1055 lzms_lz_offset_cost(const struct lzms_compressor *c, u32 offset)
1057 unsigned slot = lzms_comp_get_offset_slot(c, offset);
1058 u32 num_bits = c->lz_offset_lens[slot] + lzms_extra_offset_bits[slot];
1059 return num_bits << COST_SHIFT;
1062 /* Return the cost to encode the specified delta power and raw offset. */
1064 lzms_delta_source_cost(const struct lzms_compressor *c, u32 power, u32 raw_offset)
1066 unsigned slot = lzms_comp_get_offset_slot(c, raw_offset);
1067 u32 num_bits = c->delta_power_lens[power] + c->delta_offset_lens[slot] +
1068 lzms_extra_offset_bits[slot];
1069 return num_bits << COST_SHIFT;
1072 /******************************************************************************
1074 ******************************************************************************/
1077 lzms_init_adaptive_state(struct lzms_adaptive_state *state)
1079 for (int i = 0; i < LZMS_NUM_LZ_REPS + 1; i++)
1080 state->recent_lz_offsets[i] = i + 1;
1081 state->prev_lz_offset = 0;
1082 state->upcoming_lz_offset = 0;
1084 for (int i = 0; i < LZMS_NUM_DELTA_REPS + 1; i++)
1085 state->recent_delta_pairs[i] = i + 1;
1086 state->prev_delta_pair = 0;
1087 state->upcoming_delta_pair = 0;
1089 state->main_state = 0;
1090 state->match_state = 0;
1091 state->lz_state = 0;
1092 for (int i = 0; i < LZMS_NUM_LZ_REP_DECISIONS; i++)
1093 state->lz_rep_states[i] = 0;
1094 state->delta_state = 0;
1095 for (int i = 0; i < LZMS_NUM_DELTA_REP_DECISIONS; i++)
1096 state->delta_rep_states[i] = 0;
1100 * Update the LRU queues for match sources when advancing by one item.
1102 * Note: using LZMA as a point of comparison, the LRU queues in LZMS are more
1104 * - there are separate queues for LZ and delta matches
1105 * - updates to the queues are delayed by one encoded item (this prevents
1106 * sources from being bumped up to index 0 too early)
1109 lzms_update_lru_queues(struct lzms_adaptive_state *state)
1111 if (state->prev_lz_offset != 0) {
1112 for (int i = LZMS_NUM_LZ_REPS - 1; i >= 0; i--)
1113 state->recent_lz_offsets[i + 1] = state->recent_lz_offsets[i];
1114 state->recent_lz_offsets[0] = state->prev_lz_offset;
1116 state->prev_lz_offset = state->upcoming_lz_offset;
1118 if (state->prev_delta_pair != 0) {
1119 for (int i = LZMS_NUM_DELTA_REPS - 1; i >= 0; i--)
1120 state->recent_delta_pairs[i + 1] = state->recent_delta_pairs[i];
1121 state->recent_delta_pairs[0] = state->prev_delta_pair;
1123 state->prev_delta_pair = state->upcoming_delta_pair;
1127 lzms_update_state(u8 *state_p, int bit, unsigned num_states)
1129 *state_p = ((*state_p << 1) | bit) % num_states;
1133 lzms_update_main_state(struct lzms_adaptive_state *state, int is_match)
1135 lzms_update_state(&state->main_state, is_match, LZMS_NUM_MAIN_PROBS);
1139 lzms_update_match_state(struct lzms_adaptive_state *state, int is_delta)
1141 lzms_update_state(&state->match_state, is_delta, LZMS_NUM_MATCH_PROBS);
1145 lzms_update_lz_state(struct lzms_adaptive_state *state, int is_rep)
1147 lzms_update_state(&state->lz_state, is_rep, LZMS_NUM_LZ_PROBS);
1151 lzms_update_lz_rep_states(struct lzms_adaptive_state *state, int rep_idx)
1153 for (int i = 0; i < rep_idx; i++)
1154 lzms_update_state(&state->lz_rep_states[i], 1, LZMS_NUM_LZ_REP_PROBS);
1156 if (rep_idx < LZMS_NUM_LZ_REP_DECISIONS)
1157 lzms_update_state(&state->lz_rep_states[rep_idx], 0, LZMS_NUM_LZ_REP_PROBS);
1161 lzms_update_delta_state(struct lzms_adaptive_state *state, int is_rep)
1163 lzms_update_state(&state->delta_state, is_rep, LZMS_NUM_DELTA_PROBS);
1167 lzms_update_delta_rep_states(struct lzms_adaptive_state *state, int rep_idx)
1169 for (int i = 0; i < rep_idx; i++)
1170 lzms_update_state(&state->delta_rep_states[i], 1, LZMS_NUM_DELTA_REP_PROBS);
1172 if (rep_idx < LZMS_NUM_DELTA_REP_DECISIONS)
1173 lzms_update_state(&state->delta_rep_states[rep_idx], 0, LZMS_NUM_DELTA_REP_PROBS);
1176 /******************************************************************************
1178 ******************************************************************************/
1180 /* Note: this code just handles finding delta matches. The code for finding LZ
1181 * matches is elsewhere. */
1184 /* Initialize the delta matchfinder for a new input buffer. */
1186 lzms_init_delta_matchfinder(struct lzms_compressor *c)
1188 /* Set all entries to use an invalid power, which will never match. */
1189 BUILD_BUG_ON(NUM_POWERS_TO_CONSIDER >= (1 << (32 - DELTA_SOURCE_POWER_SHIFT)));
1190 memset(c->delta_hash_table, 0xFF, sizeof(c->delta_hash_table));
1192 /* Initialize the next hash code for each power. We can just use zeroes
1193 * initially; it doesn't really matter. */
1194 for (u32 i = 0; i < NUM_POWERS_TO_CONSIDER; i++)
1195 c->next_delta_hashes[i] = 0;
1199 * Compute a DELTA_HASH_ORDER-bit hash code for the first
1200 * NBYTES_HASHED_FOR_DELTA bytes of the sequence beginning at @p when taken in a
1201 * delta context with the specified @span.
1204 lzms_delta_hash(const u8 *p, u32 span)
1206 /* A delta match has a certain span and an offset that is a multiple of
1207 * that span. To reduce wasted space we use a single combined hash
1208 * table for all spans and positions, but to minimize collisions we
1209 * include in the hash code computation the span and the low-order bits
1210 * of the current position. */
1212 BUILD_BUG_ON(NBYTES_HASHED_FOR_DELTA != 3);
1213 u8 d0 = *(p + 0) - *(p + 0 - span);
1214 u8 d1 = *(p + 1) - *(p + 1 - span);
1215 u8 d2 = *(p + 2) - *(p + 2 - span);
1216 u32 v = ((span + ((u32)(uintptr_t)p & (span - 1))) << 24) |
1217 ((u32)d2 << 16) | ((u32)d1 << 8) | d0;
1218 return lz_hash(v, DELTA_HASH_ORDER);
1222 * Given a match between @in_next and @matchptr in a delta context with the
1223 * specified @span and having the initial @len, extend the match as far as
1224 * possible, up to a limit of @max_len.
1227 lzms_extend_delta_match(const u8 *in_next, const u8 *matchptr,
1228 u32 len, u32 max_len, u32 span)
1230 while (len < max_len &&
1231 (u8)(*(in_next + len) - *(in_next + len - span)) ==
1232 (u8)(*(matchptr + len) - *(matchptr + len - span)))
1240 lzms_delta_matchfinder_skip_bytes(struct lzms_compressor *c,
1241 const u8 *in_next, u32 count)
1243 u32 pos = in_next - c->in_buffer;
1244 if (unlikely(c->in_nbytes - (pos + count) <= NBYTES_HASHED_FOR_DELTA + 1))
1247 /* Update the hash table for each power. */
1248 for (u32 power = 0; power < NUM_POWERS_TO_CONSIDER; power++) {
1249 const u32 span = (u32)1 << power;
1250 if (unlikely(pos < span))
1252 const u32 next_hash = lzms_delta_hash(in_next + 1, span);
1253 const u32 hash = c->next_delta_hashes[power];
1254 c->delta_hash_table[hash] =
1255 (power << DELTA_SOURCE_POWER_SHIFT) | pos;
1256 c->next_delta_hashes[power] = next_hash;
1257 prefetch(&c->delta_hash_table[next_hash]);
1259 } while (in_next++, pos++, --count);
1263 * Skip the next @count bytes (don't search for matches at them). @in_next
1264 * points to the first byte to skip. The return value is @in_next + count.
1267 lzms_skip_bytes(struct lzms_compressor *c, u32 count, const u8 *in_next)
1269 lcpit_matchfinder_skip_bytes(&c->mf, count);
1270 if (c->use_delta_matches)
1271 lzms_delta_matchfinder_skip_bytes(c, in_next, count);
1272 return in_next + count;
1275 /******************************************************************************
1276 * "Near-optimal" parsing *
1277 ******************************************************************************/
1280 * The main near-optimal parsing routine.
1282 * Briefly, the algorithm does an approximate minimum-cost path search to find a
1283 * "near-optimal" sequence of matches and literals to output, based on the
1284 * current cost model. The algorithm steps forward, position by position (byte
1285 * by byte), and updates the minimum cost path to reach each later position that
1286 * can be reached using a match or literal from the current position. This is
1287 * essentially Dijkstra's algorithm in disguise: the graph nodes are positions,
1288 * the graph edges are possible matches/literals to code, and the cost of each
1289 * edge is the estimated number of bits that will be required to output the
1290 * corresponding match or literal. But one difference is that we actually
1291 * compute the lowest-cost path in pieces, where each piece is terminated when
1292 * there are no choices to be made.
1294 * The costs of literals and matches are estimated using the range encoder
1295 * states and the semi-adaptive Huffman codes. Except for range encoding
1296 * states, costs are assumed to be constant throughout a single run of the
1297 * parsing algorithm, which can parse up to NUM_OPTIM_NODES bytes of data. This
1298 * introduces a source of non-optimality because the probabilities and Huffman
1299 * codes can change over this part of the data. And of course, there are
1300 * various other reasons why the result isn't optimal in terms of compression
1304 lzms_near_optimal_parse(struct lzms_compressor *c)
1306 const u8 *in_next = c->in_buffer;
1307 const u8 * const in_end = &c->in_buffer[c->in_nbytes];
1308 struct lzms_optimum_node *cur_node;
1309 struct lzms_optimum_node *end_node;
1311 /* Set initial length costs for lengths <= MAX_FAST_LENGTH. */
1312 lzms_update_fast_length_costs(c);
1314 /* Set up the initial adaptive state. */
1315 lzms_init_adaptive_state(&c->optimum_nodes[0].state);
1318 /* Start building a new list of items, which will correspond to the next
1319 * piece of the overall minimum-cost path. */
1321 cur_node = c->optimum_nodes;
1323 end_node = cur_node;
1325 if (in_next == in_end)
1328 /* The following loop runs once for each per byte in the input buffer,
1329 * except in a few shortcut cases. */
1333 /* Repeat offset LZ matches */
1334 if (likely(in_next - c->in_buffer >= LZMS_NUM_LZ_REPS &&
1335 in_end - in_next >= 2))
1337 for (int rep_idx = 0; rep_idx < LZMS_NUM_LZ_REPS; rep_idx++) {
1339 /* Looking for a repeat offset LZ match at queue
1342 const u32 offset = cur_node->state.recent_lz_offsets[rep_idx];
1343 const u8 * const matchptr = in_next - offset;
1345 /* Check the first 2 bytes before entering the extension loop. */
1346 if (load_u16_unaligned(in_next) != load_u16_unaligned(matchptr))
1349 /* Extend the match to its full length. */
1350 const u32 rep_len = lz_extend(in_next, matchptr, 2, in_end - in_next);
1352 /* Early out for long repeat offset LZ match */
1353 if (rep_len >= c->mf.nice_match_len) {
1355 in_next = lzms_skip_bytes(c, rep_len, in_next);
1357 lzms_encode_item_list(c, cur_node);
1358 lzms_encode_item(c, rep_len, rep_idx);
1360 c->optimum_nodes[0].state = cur_node->state;
1361 cur_node = &c->optimum_nodes[0];
1363 cur_node->state.upcoming_lz_offset =
1364 cur_node->state.recent_lz_offsets[rep_idx];
1365 cur_node->state.upcoming_delta_pair = 0;
1366 for (int i = rep_idx; i < LZMS_NUM_LZ_REPS; i++)
1367 cur_node->state.recent_lz_offsets[i] =
1368 cur_node->state.recent_lz_offsets[i + 1];
1369 lzms_update_lru_queues(&cur_node->state);
1370 lzms_update_main_state(&cur_node->state, 1);
1371 lzms_update_match_state(&cur_node->state, 0);
1372 lzms_update_lz_state(&cur_node->state, 1);
1373 lzms_update_lz_rep_states(&cur_node->state, rep_idx);
1377 while (end_node < cur_node + rep_len)
1378 (++end_node)->cost = INFINITE_COST;
1380 u32 base_cost = cur_node->cost +
1381 lzms_bit_1_cost(cur_node->state.main_state,
1383 lzms_bit_0_cost(cur_node->state.match_state,
1385 lzms_bit_1_cost(cur_node->state.lz_state,
1388 for (int i = 0; i < rep_idx; i++)
1389 base_cost += lzms_bit_1_cost(cur_node->state.lz_rep_states[i],
1390 c->lz_rep_probs[i]);
1392 if (rep_idx < LZMS_NUM_LZ_REP_DECISIONS)
1393 base_cost += lzms_bit_0_cost(cur_node->state.lz_rep_states[rep_idx],
1394 c->lz_rep_probs[rep_idx]);
1398 u32 cost = base_cost + lzms_fast_length_cost(c, len);
1399 if (cost < (cur_node + len)->cost) {
1400 (cur_node + len)->cost = cost;
1401 (cur_node + len)->item = (struct lzms_item) {
1405 (cur_node + len)->num_extra_items = 0;
1407 } while (++len <= rep_len);
1410 /* try LZ-rep + lit + LZ-rep0 */
1411 if (c->try_lzrep_lit_lzrep0 &&
1412 in_end - (in_next + rep_len) >= 3 &&
1413 load_u16_unaligned(in_next + rep_len + 1) ==
1414 load_u16_unaligned(matchptr + rep_len + 1))
1416 const u32 rep0_len = lz_extend(in_next + rep_len + 1,
1417 matchptr + rep_len + 1,
1419 min(c->mf.nice_match_len,
1420 in_end - (in_next + rep_len + 1)));
1422 unsigned main_state = cur_node->state.main_state;
1423 unsigned match_state = cur_node->state.match_state;
1424 unsigned lz_state = cur_node->state.lz_state;
1425 unsigned lz_rep0_state = cur_node->state.lz_rep_states[0];
1427 /* update states for LZ-rep */
1428 main_state = ((main_state << 1) | 1) % LZMS_NUM_MAIN_PROBS;
1429 match_state = ((match_state << 1) | 0) % LZMS_NUM_MATCH_PROBS;
1430 lz_state = ((lz_state << 1) | 1) % LZMS_NUM_LZ_PROBS;
1431 lz_rep0_state = ((lz_rep0_state << 1) | (rep_idx > 0)) %
1432 LZMS_NUM_LZ_REP_PROBS;
1435 u32 cost = base_cost + lzms_fast_length_cost(c, rep_len);
1437 /* add literal cost */
1438 cost += lzms_literal_cost(c, main_state, *(in_next + rep_len));
1440 /* update state for literal */
1441 main_state = ((main_state << 1) | 0) % LZMS_NUM_MAIN_PROBS;
1443 /* add LZ-rep0 cost */
1444 cost += lzms_bit_1_cost(main_state, c->main_probs) +
1445 lzms_bit_0_cost(match_state, c->match_probs) +
1446 lzms_bit_1_cost(lz_state, c->lz_probs) +
1447 lzms_bit_0_cost(lz_rep0_state, c->lz_rep_probs[0]) +
1448 lzms_fast_length_cost(c, rep0_len);
1450 const u32 total_len = rep_len + 1 + rep0_len;
1452 while (end_node < cur_node + total_len)
1453 (++end_node)->cost = INFINITE_COST;
1455 if (cost < (cur_node + total_len)->cost) {
1456 (cur_node + total_len)->cost = cost;
1457 (cur_node + total_len)->item = (struct lzms_item) {
1461 (cur_node + total_len)->extra_items[0] = (struct lzms_item) {
1463 .source = *(in_next + rep_len),
1465 (cur_node + total_len)->extra_items[1] = (struct lzms_item) {
1469 (cur_node + total_len)->num_extra_items = 2;
1475 /* Repeat offset delta matches */
1476 if (c->use_delta_matches &&
1477 likely(in_next - c->in_buffer >= LZMS_NUM_DELTA_REPS + 1 &&
1478 (in_end - in_next >= 2)))
1480 for (int rep_idx = 0; rep_idx < LZMS_NUM_DELTA_REPS; rep_idx++) {
1482 /* Looking for a repeat offset delta match at
1483 * queue index @rep_idx */
1485 const u32 pair = cur_node->state.recent_delta_pairs[rep_idx];
1486 const u32 power = pair >> DELTA_SOURCE_POWER_SHIFT;
1487 const u32 raw_offset = pair & DELTA_SOURCE_RAW_OFFSET_MASK;
1488 const u32 span = (u32)1 << power;
1489 const u32 offset = raw_offset << power;
1490 const u8 * const matchptr = in_next - offset;
1492 /* Check the first 2 bytes before entering the
1493 * extension loop. */
1494 if (((u8)(*(in_next + 0) - *(in_next + 0 - span)) !=
1495 (u8)(*(matchptr + 0) - *(matchptr + 0 - span))) ||
1496 ((u8)(*(in_next + 1) - *(in_next + 1 - span)) !=
1497 (u8)(*(matchptr + 1) - *(matchptr + 1 - span))))
1500 /* Extend the match to its full length. */
1501 const u32 rep_len = lzms_extend_delta_match(in_next, matchptr,
1502 2, in_end - in_next,
1505 /* Early out for long repeat offset delta match */
1506 if (rep_len >= c->mf.nice_match_len) {
1508 in_next = lzms_skip_bytes(c, rep_len, in_next);
1510 lzms_encode_item_list(c, cur_node);
1511 lzms_encode_item(c, rep_len, DELTA_SOURCE_TAG | rep_idx);
1513 c->optimum_nodes[0].state = cur_node->state;
1514 cur_node = &c->optimum_nodes[0];
1516 cur_node->state.upcoming_delta_pair = pair;
1517 cur_node->state.upcoming_lz_offset = 0;
1518 for (int i = rep_idx; i < LZMS_NUM_DELTA_REPS; i++)
1519 cur_node->state.recent_delta_pairs[i] =
1520 cur_node->state.recent_delta_pairs[i + 1];
1521 lzms_update_lru_queues(&cur_node->state);
1522 lzms_update_main_state(&cur_node->state, 1);
1523 lzms_update_match_state(&cur_node->state, 1);
1524 lzms_update_delta_state(&cur_node->state, 1);
1525 lzms_update_delta_rep_states(&cur_node->state, rep_idx);
1529 while (end_node < cur_node + rep_len)
1530 (++end_node)->cost = INFINITE_COST;
1532 u32 base_cost = cur_node->cost +
1533 lzms_bit_1_cost(cur_node->state.main_state,
1535 lzms_bit_1_cost(cur_node->state.match_state,
1537 lzms_bit_1_cost(cur_node->state.delta_state,
1540 for (int i = 0; i < rep_idx; i++)
1541 base_cost += lzms_bit_1_cost(cur_node->state.delta_rep_states[i],
1542 c->delta_rep_probs[i]);
1544 if (rep_idx < LZMS_NUM_DELTA_REP_DECISIONS)
1545 base_cost += lzms_bit_0_cost(cur_node->state.delta_rep_states[rep_idx],
1546 c->delta_rep_probs[rep_idx]);
1550 u32 cost = base_cost + lzms_fast_length_cost(c, len);
1551 if (cost < (cur_node + len)->cost) {
1552 (cur_node + len)->cost = cost;
1553 (cur_node + len)->item = (struct lzms_item) {
1555 .source = DELTA_SOURCE_TAG | rep_idx,
1557 (cur_node + len)->num_extra_items = 0;
1559 } while (++len <= rep_len);
1563 /* Explicit offset LZ matches */
1564 num_matches = lcpit_matchfinder_get_matches(&c->mf, c->matches);
1567 u32 best_len = c->matches[0].length;
1569 /* Early out for long explicit offset LZ match */
1570 if (best_len >= c->mf.nice_match_len) {
1572 const u32 offset = c->matches[0].offset;
1574 /* Extend the match as far as possible.
1575 * This is necessary because the LCP-interval
1576 * tree matchfinder only reports up to
1577 * nice_match_len bytes. */
1578 best_len = lz_extend(in_next, in_next - offset,
1579 best_len, in_end - in_next);
1581 in_next = lzms_skip_bytes(c, best_len - 1, in_next + 1);
1583 lzms_encode_item_list(c, cur_node);
1584 lzms_encode_item(c, best_len, offset + LZMS_NUM_LZ_REPS - 1);
1586 c->optimum_nodes[0].state = cur_node->state;
1587 cur_node = &c->optimum_nodes[0];
1589 cur_node->state.upcoming_lz_offset = offset;
1590 cur_node->state.upcoming_delta_pair = 0;
1591 lzms_update_lru_queues(&cur_node->state);
1592 lzms_update_main_state(&cur_node->state, 1);
1593 lzms_update_match_state(&cur_node->state, 0);
1594 lzms_update_lz_state(&cur_node->state, 0);
1598 while (end_node < cur_node + best_len)
1599 (++end_node)->cost = INFINITE_COST;
1601 u32 base_cost = cur_node->cost +
1602 lzms_bit_1_cost(cur_node->state.main_state,
1604 lzms_bit_0_cost(cur_node->state.match_state,
1606 lzms_bit_0_cost(cur_node->state.lz_state,
1609 if (c->try_lzmatch_lit_lzrep0 &&
1610 likely(in_end - (in_next + c->matches[0].length) >= 3))
1612 /* try LZ-match + lit + LZ-rep0 */
1615 u32 i = num_matches - 1;
1617 const u32 len = c->matches[i].length;
1618 const u32 offset = c->matches[i].offset;
1619 const u32 position_cost = base_cost +
1620 lzms_lz_offset_cost(c, offset);
1622 u32 cost = position_cost + lzms_fast_length_cost(c, l);
1623 if (cost < (cur_node + l)->cost) {
1624 (cur_node + l)->cost = cost;
1625 (cur_node + l)->item = (struct lzms_item) {
1627 .source = offset + (LZMS_NUM_LZ_REPS - 1),
1629 (cur_node + l)->num_extra_items = 0;
1631 } while (++l <= len);
1633 const u8 * const matchptr = in_next - offset;
1634 if (load_u16_unaligned(matchptr + len + 1) !=
1635 load_u16_unaligned(in_next + len + 1))
1638 const u32 rep0_len = lz_extend(in_next + len + 1,
1641 min(c->mf.nice_match_len,
1642 in_end - (in_next + len + 1)));
1644 unsigned main_state = cur_node->state.main_state;
1645 unsigned match_state = cur_node->state.match_state;
1646 unsigned lz_state = cur_node->state.lz_state;
1648 /* update states for LZ-match */
1649 main_state = ((main_state << 1) | 1) % LZMS_NUM_MAIN_PROBS;
1650 match_state = ((match_state << 1) | 0) % LZMS_NUM_MATCH_PROBS;
1651 lz_state = ((lz_state << 1) | 0) % LZMS_NUM_LZ_PROBS;
1654 u32 cost = position_cost + lzms_fast_length_cost(c, len);
1656 /* add literal cost */
1657 cost += lzms_literal_cost(c, main_state, *(in_next + len));
1659 /* update state for literal */
1660 main_state = ((main_state << 1) | 0) % LZMS_NUM_MAIN_PROBS;
1662 /* add LZ-rep0 cost */
1663 cost += lzms_bit_1_cost(main_state, c->main_probs) +
1664 lzms_bit_0_cost(match_state, c->match_probs) +
1665 lzms_bit_1_cost(lz_state, c->lz_probs) +
1666 lzms_bit_0_cost(cur_node->state.lz_rep_states[0],
1667 c->lz_rep_probs[0]) +
1668 lzms_fast_length_cost(c, rep0_len);
1670 const u32 total_len = len + 1 + rep0_len;
1672 while (end_node < cur_node + total_len)
1673 (++end_node)->cost = INFINITE_COST;
1675 if (cost < (cur_node + total_len)->cost) {
1676 (cur_node + total_len)->cost = cost;
1677 (cur_node + total_len)->item = (struct lzms_item) {
1681 (cur_node + total_len)->extra_items[0] = (struct lzms_item) {
1683 .source = *(in_next + len),
1685 (cur_node + total_len)->extra_items[1] = (struct lzms_item) {
1687 .source = offset + LZMS_NUM_LZ_REPS - 1,
1689 (cur_node + total_len)->num_extra_items = 2;
1694 u32 i = num_matches - 1;
1696 u32 position_cost = base_cost +
1697 lzms_lz_offset_cost(c, c->matches[i].offset);
1699 u32 cost = position_cost + lzms_fast_length_cost(c, l);
1700 if (cost < (cur_node + l)->cost) {
1701 (cur_node + l)->cost = cost;
1702 (cur_node + l)->item = (struct lzms_item) {
1704 .source = c->matches[i].offset +
1705 (LZMS_NUM_LZ_REPS - 1),
1707 (cur_node + l)->num_extra_items = 0;
1709 } while (++l <= c->matches[i].length);
1714 /* Explicit offset delta matches */
1715 if (c->use_delta_matches &&
1716 likely(in_end - in_next >= NBYTES_HASHED_FOR_DELTA + 1))
1718 const u32 pos = in_next - c->in_buffer;
1720 /* Consider each possible power (log2 of span) */
1721 BUILD_BUG_ON(NUM_POWERS_TO_CONSIDER > LZMS_NUM_DELTA_POWER_SYMS);
1722 for (u32 power = 0; power < NUM_POWERS_TO_CONSIDER; power++) {
1724 const u32 span = (u32)1 << power;
1726 if (unlikely(pos < span))
1729 const u32 next_hash = lzms_delta_hash(in_next + 1, span);
1730 const u32 hash = c->next_delta_hashes[power];
1731 const u32 cur_match = c->delta_hash_table[hash];
1733 c->delta_hash_table[hash] = (power << DELTA_SOURCE_POWER_SHIFT) | pos;
1734 c->next_delta_hashes[power] = next_hash;
1735 prefetch(&c->delta_hash_table[next_hash]);
1737 if (power != cur_match >> DELTA_SOURCE_POWER_SHIFT)
1740 const u32 offset = pos - (cur_match & DELTA_SOURCE_RAW_OFFSET_MASK);
1742 /* The offset must be a multiple of span. */
1743 if (offset & (span - 1))
1746 const u8 * const matchptr = in_next - offset;
1748 /* Check the first 3 bytes before entering the
1749 * extension loop. */
1750 BUILD_BUG_ON(NBYTES_HASHED_FOR_DELTA != 3);
1751 if (((u8)(*(in_next + 0) - *(in_next + 0 - span)) !=
1752 (u8)(*(matchptr + 0) - *(matchptr + 0 - span))) ||
1753 ((u8)(*(in_next + 1) - *(in_next + 1 - span)) !=
1754 (u8)(*(matchptr + 1) - *(matchptr + 1 - span))) ||
1755 ((u8)(*(in_next + 2) - *(in_next + 2 - span)) !=
1756 (u8)(*(matchptr + 2) - *(matchptr + 2 - span))))
1759 /* Extend the delta match to its full length. */
1760 const u32 len = lzms_extend_delta_match(in_next,
1766 const u32 raw_offset = offset >> power;
1767 const u32 pair = (power << DELTA_SOURCE_POWER_SHIFT) |
1769 const u32 source = DELTA_SOURCE_TAG |
1770 (pair + LZMS_NUM_DELTA_REPS - 1);
1772 /* Early out for long explicit offset delta match */
1773 if (len >= c->mf.nice_match_len) {
1775 in_next = lzms_skip_bytes(c, len - 1, in_next + 1);
1777 lzms_encode_item_list(c, cur_node);
1778 lzms_encode_item(c, len, source);
1780 c->optimum_nodes[0].state = cur_node->state;
1781 cur_node = &c->optimum_nodes[0];
1783 cur_node->state.upcoming_lz_offset = 0;
1784 cur_node->state.upcoming_delta_pair = pair;
1785 lzms_update_lru_queues(&cur_node->state);
1786 lzms_update_main_state(&cur_node->state, 1);
1787 lzms_update_match_state(&cur_node->state, 1);
1788 lzms_update_delta_state(&cur_node->state, 0);
1792 while (end_node < cur_node + len)
1793 (++end_node)->cost = INFINITE_COST;
1795 u32 base_cost = cur_node->cost +
1796 lzms_bit_1_cost(cur_node->state.main_state,
1798 lzms_bit_1_cost(cur_node->state.match_state,
1800 lzms_bit_0_cost(cur_node->state.delta_state,
1802 lzms_delta_source_cost(c, power, raw_offset);
1804 u32 l = NBYTES_HASHED_FOR_DELTA;
1806 u32 cost = base_cost + lzms_fast_length_cost(c, l);
1807 if (cost < (cur_node + l)->cost) {
1808 (cur_node + l)->cost = cost;
1809 (cur_node + l)->item = (struct lzms_item) {
1813 (cur_node + l)->num_extra_items = 0;
1815 } while (++l <= len);
1820 if (end_node < cur_node + 1)
1821 (++end_node)->cost = INFINITE_COST;
1822 const u32 cur_and_lit_cost = cur_node->cost +
1823 lzms_literal_cost(c, cur_node->state.main_state,
1825 if (cur_and_lit_cost < (cur_node + 1)->cost) {
1826 (cur_node + 1)->cost = cur_and_lit_cost;
1827 (cur_node + 1)->item = (struct lzms_item) {
1831 (cur_node + 1)->num_extra_items = 0;
1832 } else if (c->try_lit_lzrep0 && in_end - (in_next + 1) >= 2) {
1833 /* try lit + LZ-rep0 */
1835 (cur_node->state.prev_lz_offset) ?
1836 cur_node->state.prev_lz_offset :
1837 cur_node->state.recent_lz_offsets[0];
1839 if (load_u16_unaligned(in_next + 1) ==
1840 load_u16_unaligned(in_next + 1 - offset))
1842 const u32 rep0_len = lz_extend(in_next + 1,
1843 in_next + 1 - offset,
1845 min(in_end - (in_next + 1),
1846 c->mf.nice_match_len));
1848 unsigned main_state = cur_node->state.main_state;
1850 /* Update main_state after literal */
1851 main_state = (main_state << 1 | 0) % LZMS_NUM_MAIN_PROBS;
1853 /* Add cost of LZ-rep0 */
1854 const u32 cost = cur_and_lit_cost +
1855 lzms_bit_1_cost(main_state, c->main_probs) +
1856 lzms_bit_0_cost(cur_node->state.match_state,
1858 lzms_bit_1_cost(cur_node->state.lz_state,
1860 lzms_bit_0_cost(cur_node->state.lz_rep_states[0],
1861 c->lz_rep_probs[0]) +
1862 lzms_fast_length_cost(c, rep0_len);
1864 const u32 total_len = 1 + rep0_len;
1866 while (end_node < cur_node + total_len)
1867 (++end_node)->cost = INFINITE_COST;
1869 if (cost < (cur_node + total_len)->cost) {
1870 (cur_node + total_len)->cost = cost;
1871 (cur_node + total_len)->item = (struct lzms_item) {
1875 (cur_node + total_len)->extra_items[0] = (struct lzms_item) {
1879 (cur_node + total_len)->num_extra_items = 1;
1884 /* Advance to the next position. */
1888 /* The lowest-cost path to the current position is now known.
1889 * Finalize the adaptive state that results from taking this
1890 * lowest-cost path. */
1891 struct lzms_item item_to_take = cur_node->item;
1892 struct lzms_optimum_node *source_node = cur_node - (item_to_take.length);
1893 int next_item_idx = -1;
1894 for (unsigned i = 0; i < cur_node->num_extra_items; i++) {
1895 item_to_take = cur_node->extra_items[i];
1896 source_node -= item_to_take.length;
1899 cur_node->state = source_node->state;
1901 const u32 length = item_to_take.length;
1902 u32 source = item_to_take.source;
1904 cur_node->state.upcoming_lz_offset = 0;
1905 cur_node->state.upcoming_delta_pair = 0;
1909 lzms_update_main_state(&cur_node->state, 1);
1911 if (source & DELTA_SOURCE_TAG) {
1914 lzms_update_match_state(&cur_node->state, 1);
1915 source &= ~DELTA_SOURCE_TAG;
1917 if (source >= LZMS_NUM_DELTA_REPS) {
1918 /* Explicit offset delta match */
1919 u32 pair = source - (LZMS_NUM_DELTA_REPS - 1);
1920 lzms_update_delta_state(&cur_node->state, 0);
1921 cur_node->state.upcoming_delta_pair = pair;
1923 /* Repeat offset delta match */
1924 int rep_idx = source;
1926 lzms_update_delta_state(&cur_node->state, 1);
1927 lzms_update_delta_rep_states(&cur_node->state, rep_idx);
1929 cur_node->state.upcoming_delta_pair =
1930 cur_node->state.recent_delta_pairs[rep_idx];
1932 for (int i = rep_idx; i < LZMS_NUM_DELTA_REPS; i++)
1933 cur_node->state.recent_delta_pairs[i] =
1934 cur_node->state.recent_delta_pairs[i + 1];
1937 lzms_update_match_state(&cur_node->state, 0);
1939 if (source >= LZMS_NUM_LZ_REPS) {
1940 /* Explicit offset LZ match */
1941 lzms_update_lz_state(&cur_node->state, 0);
1942 cur_node->state.upcoming_lz_offset =
1943 source - (LZMS_NUM_LZ_REPS - 1);
1945 /* Repeat offset LZ match */
1946 int rep_idx = source;
1948 lzms_update_lz_state(&cur_node->state, 1);
1949 lzms_update_lz_rep_states(&cur_node->state, rep_idx);
1951 cur_node->state.upcoming_lz_offset =
1952 cur_node->state.recent_lz_offsets[rep_idx];
1954 for (int i = rep_idx; i < LZMS_NUM_LZ_REPS; i++)
1955 cur_node->state.recent_lz_offsets[i] =
1956 cur_node->state.recent_lz_offsets[i + 1];
1961 lzms_update_main_state(&cur_node->state, 0);
1964 lzms_update_lru_queues(&cur_node->state);
1966 if (next_item_idx < 0)
1968 if (next_item_idx == 0)
1969 item_to_take = cur_node->item;
1971 item_to_take = cur_node->extra_items[next_item_idx - 1];
1976 * This loop will terminate when either of the following
1977 * conditions is true:
1979 * (1) cur_node == end_node
1981 * There are no paths that extend beyond the current
1982 * position. In this case, any path to a later position
1983 * must pass through the current position, so we can go
1984 * ahead and choose the list of items that led to this
1987 * (2) cur_node == &c->optimum_nodes[NUM_OPTIM_NODES]
1989 * This bounds the number of times the algorithm can step
1990 * forward before it is guaranteed to start choosing items.
1991 * This limits the memory usage. It also guarantees that
1992 * the parser will not go too long without updating the
1993 * probability tables.
1995 * Note: no check for end-of-buffer is needed because
1996 * end-of-buffer will trigger condition (1).
1998 if (cur_node == end_node ||
1999 cur_node == &c->optimum_nodes[NUM_OPTIM_NODES])
2001 lzms_encode_nonempty_item_list(c, cur_node);
2002 c->optimum_nodes[0].state = cur_node->state;
2009 lzms_init_states_and_probabilities(struct lzms_compressor *c)
2014 for (int i = 0; i < LZMS_NUM_LZ_REP_DECISIONS; i++)
2015 c->lz_rep_states[i] = 0;
2017 for (int i = 0; i < LZMS_NUM_DELTA_REP_DECISIONS; i++)
2018 c->delta_rep_states[i] = 0;
2020 lzms_init_probability_entries(c->main_probs, LZMS_NUM_MAIN_PROBS);
2021 lzms_init_probability_entries(c->match_probs, LZMS_NUM_MATCH_PROBS);
2022 lzms_init_probability_entries(c->lz_probs, LZMS_NUM_LZ_PROBS);
2023 for (int i = 0; i < LZMS_NUM_LZ_REP_DECISIONS; i++)
2024 lzms_init_probability_entries(c->lz_rep_probs[i], LZMS_NUM_LZ_REP_PROBS);
2025 lzms_init_probability_entries(c->delta_probs, LZMS_NUM_DELTA_PROBS);
2026 for (int i = 0; i < LZMS_NUM_DELTA_REP_DECISIONS; i++)
2027 lzms_init_probability_entries(c->delta_rep_probs[i], LZMS_NUM_DELTA_REP_PROBS);
2031 lzms_init_huffman_codes(struct lzms_compressor *c, unsigned num_offset_slots)
2033 lzms_init_huffman_code(&c->literal_rebuild_info,
2034 LZMS_NUM_LITERAL_SYMS,
2035 LZMS_LITERAL_CODE_REBUILD_FREQ,
2036 c->literal_codewords,
2040 lzms_init_huffman_code(&c->lz_offset_rebuild_info,
2042 LZMS_LZ_OFFSET_CODE_REBUILD_FREQ,
2043 c->lz_offset_codewords,
2045 c->lz_offset_freqs);
2047 lzms_init_huffman_code(&c->length_rebuild_info,
2048 LZMS_NUM_LENGTH_SYMS,
2049 LZMS_LENGTH_CODE_REBUILD_FREQ,
2050 c->length_codewords,
2054 lzms_init_huffman_code(&c->delta_offset_rebuild_info,
2056 LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ,
2057 c->delta_offset_codewords,
2058 c->delta_offset_lens,
2059 c->delta_offset_freqs);
2061 lzms_init_huffman_code(&c->delta_power_rebuild_info,
2062 LZMS_NUM_DELTA_POWER_SYMS,
2063 LZMS_DELTA_POWER_CODE_REBUILD_FREQ,
2064 c->delta_power_codewords,
2065 c->delta_power_lens,
2066 c->delta_power_freqs);
2070 * Flush the output streams, prepare the final compressed data, and return its
2073 * A return value of 0 indicates that the data could not be compressed to fit in
2074 * the available space.
2077 lzms_finalize(struct lzms_compressor *c)
2079 size_t num_forwards_units;
2080 size_t num_backwards_units;
2082 /* Flush both the forwards and backwards streams, and make sure they
2083 * didn't cross each other and start overwriting each other's data. */
2084 if (!lzms_output_bitstream_flush(&c->os))
2087 if (!lzms_range_encoder_flush(&c->rc))
2090 if (c->rc.next > c->os.next)
2093 /* Now the compressed buffer contains the data output by the forwards
2094 * bitstream, then empty space, then data output by the backwards
2095 * bitstream. Move the data output by the backwards bitstream to be
2096 * adjacent to the data output by the forward bitstream, and calculate
2097 * the compressed size that this results in. */
2098 num_forwards_units = c->rc.next - c->rc.begin;
2099 num_backwards_units = c->rc.end - c->os.next;
2101 memmove(c->rc.next, c->os.next, num_backwards_units * sizeof(le16));
2103 return (num_forwards_units + num_backwards_units) * sizeof(le16);
2107 lzms_get_needed_memory(size_t max_bufsize, unsigned compression_level,
2112 if (max_bufsize > LZMS_MAX_BUFFER_SIZE)
2115 size += sizeof(struct lzms_compressor);
2118 size += max_bufsize; /* in_buffer */
2121 size += lcpit_matchfinder_get_needed_memory(max_bufsize);
2127 lzms_create_compressor(size_t max_bufsize, unsigned compression_level,
2128 bool destructive, void **c_ret)
2130 struct lzms_compressor *c;
2133 if (max_bufsize > LZMS_MAX_BUFFER_SIZE)
2134 return WIMLIB_ERR_INVALID_PARAM;
2136 c = ALIGNED_MALLOC(sizeof(struct lzms_compressor), 64);
2140 c->destructive = destructive;
2142 /* Scale nice_match_len with the compression level. But to allow an
2143 * optimization for length cost calculations, don't allow nice_match_len
2144 * to exceed MAX_FAST_LENGTH. */
2145 nice_match_len = min(((u64)compression_level * 63) / 50, MAX_FAST_LENGTH);
2147 c->use_delta_matches = (compression_level >= 35);
2148 c->try_lzmatch_lit_lzrep0 = (compression_level >= 45);
2149 c->try_lit_lzrep0 = (compression_level >= 60);
2150 c->try_lzrep_lit_lzrep0 = (compression_level >= 60);
2152 if (!c->destructive) {
2153 c->in_buffer = MALLOC(max_bufsize);
2158 if (!lcpit_matchfinder_init(&c->mf, max_bufsize, 2, nice_match_len))
2161 lzms_init_fast_length_slot_tab(c);
2162 lzms_init_offset_slot_tabs(c);
2168 if (!c->destructive)
2173 return WIMLIB_ERR_NOMEM;
2177 lzms_compress(const void *in, size_t in_nbytes,
2178 void *out, size_t out_nbytes_avail, void *_c)
2180 struct lzms_compressor *c = _c;
2183 /* Don't bother trying to compress extremely small inputs. */
2187 /* Copy the input data into the internal buffer and preprocess it. */
2189 c->in_buffer = (void *)in;
2191 memcpy(c->in_buffer, in, in_nbytes);
2192 c->in_nbytes = in_nbytes;
2193 lzms_x86_filter(c->in_buffer, in_nbytes, c->last_target_usages, false);
2195 /* Prepare the matchfinders. */
2196 lcpit_matchfinder_load_buffer(&c->mf, c->in_buffer, c->in_nbytes);
2197 if (c->use_delta_matches)
2198 lzms_init_delta_matchfinder(c);
2200 /* Initialize the encoder structures. */
2201 lzms_range_encoder_init(&c->rc, out, out_nbytes_avail / sizeof(le16));
2202 lzms_output_bitstream_init(&c->os, out, out_nbytes_avail / sizeof(le16));
2203 lzms_init_states_and_probabilities(c);
2204 lzms_init_huffman_codes(c, lzms_get_num_offset_slots(c->in_nbytes));
2206 /* The main loop: parse and encode. */
2207 lzms_near_optimal_parse(c);
2209 /* Return the compressed data size or 0. */
2210 result = lzms_finalize(c);
2211 if (!result && c->destructive)
2212 lzms_x86_filter(c->in_buffer, c->in_nbytes, c->last_target_usages, true);
2217 lzms_free_compressor(void *_c)
2219 struct lzms_compressor *c = _c;
2221 if (!c->destructive)
2223 lcpit_matchfinder_destroy(&c->mf);
2227 const struct compressor_ops lzms_compressor_ops = {
2228 .get_needed_memory = lzms_get_needed_memory,
2229 .create_compressor = lzms_create_compressor,
2230 .compress = lzms_compress,
2231 .free_compressor = lzms_free_compressor,