* LZX_CACHE_PER_POS is the number of lz_match structures to reserve in the
* match cache for each byte position. This value should be high enough so that
* nearly the time, all matches found in a given block can fit in the match
- * cache. However, fallback behavior on cache overflow is still required.
+ * cache. However, fallback behavior (immediately terminating the block) on
+ * cache overflow is still required.
*/
-#define LZX_CACHE_PER_POS 6
+#define LZX_CACHE_PER_POS 7
-#define LZX_CACHE_LEN (LZX_DIV_BLOCK_SIZE * (LZX_CACHE_PER_POS + 1))
+/*
+ * LZX_CACHE_LENGTH is the number of lz_match structures in the match cache,
+ * excluding the extra "overflow" entries. The per-position multiplier is '1 +
+ * LZX_CACHE_PER_POS' instead of 'LZX_CACHE_PER_POS' because there is an
+ * overhead of one lz_match per position, used to hold the match count at that
+ * position.
+ */
+#define LZX_CACHE_LENGTH (LZX_DIV_BLOCK_SIZE * (1 + LZX_CACHE_PER_POS))
+/*
+ * LZX_MAX_MATCHES_PER_POS is an upper bound on the number of matches that can
+ * ever be saved in the match cache for a single position. Since each match we
+ * save for a single position has a distinct length, we can use the number of
+ * possible match lengths in LZX as this bound. This bound is guaranteed to be
+ * valid in all cases, although if 'nice_match_length < LZX_MAX_MATCH_LEN', then
+ * it will never actually be reached.
+ */
#define LZX_MAX_MATCHES_PER_POS LZX_NUM_LENS
/*
#define LZX_CONSIDER_ALIGNED_COSTS 0
/*
- * The maximum compression level at which we use the faster algorithm.
+ * LZX_MAX_FAST_LEVEL is the maximum compression level at which we use the
+ * faster algorithm.
*/
#define LZX_MAX_FAST_LEVEL 34
#include "wimlib/bt_matchfinder.h"
#include "wimlib/compress_common.h"
#include "wimlib/compressor_ops.h"
-#include "wimlib/endianness.h"
#include "wimlib/error.h"
#include "wimlib/hc_matchfinder.h"
#include "wimlib/lz_extend.h"
struct lzx_codes codes[2];
unsigned codes_index;
- /* The match/literal sequence the algorithm chose for the current block.
+ /*
+ * The match/literal sequence the algorithm chose for the current block.
+ *
+ * Notes on how large this array actually needs to be:
+ *
+ * - In lzx_compress_near_optimal(), the maximum block size is
+ * 'LZX_DIV_BLOCK_SIZE + LZX_MAX_MATCH_LEN - 1' bytes. This occurs if
+ * a match of the maximum length is found on the last byte. Although
+ * it is impossible for this particular case to actually result in a
+ * parse of all literals, we reserve this many spaces anyway.
+ *
+ * - The worst case for lzx_compress_lazy() is a block of almost all
+ * literals that ends with a series of matches of increasing scores,
+ * causing a sequence of literals to be chosen before the last match
+ * is finally chosen. The number of items actually chosen in this
+ * scenario is limited by the number of distinct match scores that
+ * exist for matches shorter than 'nice_match_length'. Having
+ * 'LZX_MAX_MATCH_LEN - 1' extra spaces is plenty for now.
*/
- struct lzx_item chosen_items[LZX_DIV_BLOCK_SIZE + LZX_MAX_MATCH_LEN + 1];
+ struct lzx_item chosen_items[LZX_DIV_BLOCK_SIZE + LZX_MAX_MATCH_LEN - 1];
/* Table mapping match offset => offset slot for small offsets */
#define LZX_NUM_FAST_OFFSETS 32768
/* Data for near-optimal parsing */
struct {
- /* The graph nodes for the current block */
+ /*
+ * The graph nodes for the current block.
+ *
+ * We need at least 'LZX_DIV_BLOCK_SIZE +
+ * LZX_MAX_MATCH_LEN - 1' nodes because that is the
+ * maximum block size that may be used. Add 1 because
+ * we need a node to represent end-of-block.
+ *
+ * It is possible that nodes past end-of-block are
+ * accessed during match consideration, but this can
+ * only occur if the block was truncated at
+ * LZX_DIV_BLOCK_SIZE. So the same bound still applies.
+ * Note that since nodes past the end of the block will
+ * never actually have an effect on the items that are
+ * chosen for the block, it makes no difference what
+ * their costs are initialized to (if anything).
+ */
struct lzx_optimum_node optimum_nodes[LZX_DIV_BLOCK_SIZE +
- LZX_MAX_MATCH_LEN + 1];
+ LZX_MAX_MATCH_LEN - 1 + 1];
/* The cost model for the current block */
struct lzx_costs costs;
- /* Cached matches for the current block */
- struct lz_match match_cache[LZX_CACHE_LEN + 1 +
- LZX_MAX_MATCHES_PER_POS];
- struct lz_match *cache_overflow_mark;
+ /*
+ * Cached matches for the current block. This array
+ * contains the matches that were found at each position
+ * in the block. Specifically, for each position, there
+ * is a special 'struct lz_match' whose 'length' field
+ * contains the number of matches that were found at
+ * that position; this is followed by the matches
+ * themselves, if any, sorted by strictly increasing
+ * length.
+ *
+ * Note: in rare cases, there will be a very high number
+ * of matches in the block and this array will overflow.
+ * If this happens, we force the end of the current
+ * block. LZX_CACHE_LENGTH is the length at which we
+ * actually check for overflow. The extra slots beyond
+ * this are enough to absorb the worst case overflow,
+ * which occurs if starting at
+ * &match_cache[LZX_CACHE_LENGTH - 1], we write the
+ * match count header, then write
+ * LZX_MAX_MATCHES_PER_POS matches, then skip searching
+ * for matches at 'LZX_MAX_MATCH_LEN - 1' positions and
+ * write the match count header for each.
+ */
+ struct lz_match match_cache[LZX_CACHE_LENGTH +
+ LZX_MAX_MATCHES_PER_POS +
+ LZX_MAX_MATCH_LEN - 1];
/* Hash table for finding length 2 matches */
pos_t hash2_tab[LZX_HASH2_LENGTH]
};
};
-/* Compute a hash value for the next 2 bytes of uncompressed data. */
-static inline u32
-lz_hash_2_bytes(const u8 *in_next)
-{
- u16 next_2_bytes = load_u16_unaligned(in_next);
- if (LZX_HASH2_ORDER == 16)
- return next_2_bytes;
- else
- return lz_hash(next_2_bytes, LZX_HASH2_ORDER);
-}
-
/*
* Structure to keep track of the current state of sending bits to the
* compressed output buffer.
u32 bitcount;
/* Pointer to the start of the output buffer. */
- le16 *start;
+ u8 *start;
/* Pointer to the position in the output buffer at which the next coding
* unit should be written. */
- le16 *next;
+ u8 *next;
- /* Pointer past the end of the output buffer. */
- le16 *end;
+ /* Pointer just past the end of the output buffer, rounded down to a
+ * 2-byte boundary. */
+ u8 *end;
};
/*
os->bitcount = 0;
os->start = buffer;
os->next = os->start;
- os->end = os->start + size / sizeof(le16);
+ os->end = os->start + (size & ~1);
}
/*
* bits in @bits cannot be set. At most 17 bits can be written at once.
*
* @max_num_bits is a compile-time constant that specifies the maximum number of
- * bits that can ever be written at the call site. Currently, it is used to
- * optimize away the conditional code for writing a second 16-bit coding unit
- * when writing fewer than 17 bits.
+ * bits that can ever be written at the call site. It is used to optimize away
+ * the conditional code for writing a second 16-bit coding unit when writing
+ * fewer than 17 bits.
*
* If the output buffer space is exhausted, then the bits will be ignored, and
* lzx_flush_output() will return 0 when it gets called.
os->bitcount -= 16;
/* Write a coding unit, unless it would overflow the buffer. */
- if (os->next != os->end)
- put_unaligned_u16_le(os->bitbuf >> os->bitcount, os->next++);
+ if (os->next != os->end) {
+ put_unaligned_u16_le(os->bitbuf >> os->bitcount, os->next);
+ os->next += 2;
+ }
/* If writing 17 bits, a second coding unit might need to be
* written. But because 'max_num_bits' is a compile-time
* constant, the compiler will optimize away this code at most
* call sites. */
if (max_num_bits == 17 && os->bitcount == 16) {
- if (os->next != os->end)
- put_unaligned_u16_le(os->bitbuf, os->next++);
+ if (os->next != os->end) {
+ put_unaligned_u16_le(os->bitbuf, os->next);
+ os->next += 2;
+ }
os->bitcount = 0;
}
}
/* Use when @num_bits is a compile-time constant. Otherwise use
* lzx_write_varbits(). */
static inline void
-lzx_write_bits(struct lzx_output_bitstream *os,
- const u32 bits, const unsigned num_bits)
+lzx_write_bits(struct lzx_output_bitstream *os, u32 bits, unsigned num_bits)
{
lzx_write_varbits(os, bits, num_bits, num_bits);
}
if (os->next == os->end)
return 0;
- if (os->bitcount != 0)
- put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), os->next++);
+ if (os->bitcount != 0) {
+ put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), os->next);
+ os->next += 2;
+ }
- return (const u8 *)os->next - (const u8 *)os->start;
+ return os->next - os->start;
}
/* Build the main, length, and aligned offset Huffman codes used in LZX.
* Also, note that because of the presence of the recent offsets queue (which is
* a type of adaptive state), the algorithm cannot work backwards and compute
* "cost to end" instead of "cost to beginning". Furthermore, the way the
- * algorithm handles this adaptive state in the "minimum-cost" parse is actually
+ * algorithm handles this adaptive state in the "minimum cost" parse is actually
* only an approximation. It's possible for the globally optimal, minimum cost
* path to contain a prefix, ending at a position, where that path prefix is
* *not* the minimum cost path to that position. This can happen if such a path
}
} while (cur_node != end_node);
- /* Return the match offset queue at the end of the minimum-cost path. */
+ /* Return the match offset queue at the end of the minimum cost path. */
return QUEUE(block_end);
}
* match of the very last two bytes with the
* very first two bytes, since such a match has
* an offset too large to be represented. */
- if (unlikely(max_len <
- max(LZ_HASH_REQUIRED_NBYTES, 3)))
- {
+ if (unlikely(max_len < 3)) {
in_next++;
cache_ptr->length = 0;
cache_ptr++;
lz_matchptr = cache_ptr + 1;
/* Check for a length 2 match. */
- hash2 = lz_hash_2_bytes(in_next);
+ hash2 = lz_hash_2_bytes(in_next, LZX_HASH2_ORDER);
cur_match = c->hash2_tab[hash2];
c->hash2_tab[hash2] = in_next - in_begin;
if (matchfinder_node_valid(cur_match) &&
(LZX_HASH2_ORDER == 16 ||
load_u16_unaligned(&in_begin[cur_match]) ==
- load_u16_unaligned(in_next)) &&
- in_begin[cur_match + 2] != in_next[2])
+ load_u16_unaligned(in_next)))
{
lz_matchptr->length = 2;
lz_matchptr->offset = in_next - &in_begin[cur_match];
if (unlikely(max_len > in_end - in_next)) {
max_len = in_end - in_next;
nice_len = min(max_len, nice_len);
- if (unlikely(max_len <
- max(LZ_HASH_REQUIRED_NBYTES, 3)))
- {
+ if (unlikely(max_len < 3)) {
in_next++;
cache_ptr->length = 0;
cache_ptr++;
continue;
}
}
- c->hash2_tab[lz_hash_2_bytes(in_next)] =
+ c->hash2_tab[lz_hash_2_bytes(in_next, LZX_HASH2_ORDER)] =
in_next - in_begin;
bt_matchfinder_skip_position(&c->bt_mf,
in_begin,
} while (--best_len);
}
} while (in_next < in_block_end &&
- likely(cache_ptr < c->cache_overflow_mark));
+ likely(cache_ptr < &c->match_cache[LZX_CACHE_LENGTH]));
/* We've finished running the block through the matchfinder.
* Now choose a match/literal sequence and write the block. */
c->impl = lzx_compress_lazy;
c->max_search_depth = (36 * compression_level) / 20;
- c->nice_match_length = min((72 * compression_level) / 20,
- LZX_MAX_MATCH_LEN);
+ c->nice_match_length = (72 * compression_level) / 20;
+ /* lzx_compress_lazy() needs max_search_depth >= 2 because it
+ * halves the max_search_depth when attempting a lazy match, and
+ * max_search_depth cannot be 0. */
+ if (c->max_search_depth < 2)
+ c->max_search_depth = 2;
} else {
/* Normal / high compression: Use near-optimal parsing. */
/* Scale nice_match_length and max_search_depth with the
* compression level. */
c->max_search_depth = (24 * compression_level) / 50;
- c->nice_match_length = min((32 * compression_level) / 50,
- LZX_MAX_MATCH_LEN);
+ c->nice_match_length = (32 * compression_level) / 50;
/* Set a number of optimization passes appropriate for the
* compression level. */
if (compression_level >= 300)
c->num_optim_passes++;
}
-
- c->cache_overflow_mark = &c->match_cache[LZX_CACHE_LEN];
}
+ /* max_search_depth == 0 is invalid. */
+ if (c->max_search_depth < 1)
+ c->max_search_depth = 1;
+
+ if (c->nice_match_length > LZX_MAX_MATCH_LEN)
+ c->nice_match_length = LZX_MAX_MATCH_LEN;
+
lzx_init_offset_slot_fast(c);
*c_ret = c;
return 0;