X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzx-compress.c;h=5246e1a1ed3b7add7f7c543c0a240177c3c1218e;hp=056dc83bb52944c63ad9b98cdad8f966b2340e76;hb=6d0470be4d1855a0ea254e788e9ced23fc36dfb7;hpb=dbfee435692344cccd48bb4c7deb3af23ac80176

diff --git a/src/lzx-compress.c b/src/lzx-compress.c
index 056dc83b..5246e1a1 100644
--- a/src/lzx-compress.c
+++ b/src/lzx-compress.c
@@ -112,7 +112,7 @@
  *    Huffman codes that were computed for the block.
  *
  * Note: the algorithm does not yet attempt to split the input into multiple LZX
- * blocks, instead using a series of blocks of LZX_DIV_BLOCK_SIZE bytes.
+ * blocks; it instead uses a series of blocks of LZX_DIV_BLOCK_SIZE bytes.
  *
  * Fast algorithm
  * --------------
@@ -131,7 +131,7 @@
  * it possible to implement this code:
  *
  * - divsufsort (author: Yuta Mori), for the suffix array construction code,
- *   located in a separate directory (divsufsort/).
+ *   located in a separate file (divsufsort.c).
  *
  * - "Linear-Time Longest-Common-Prefix Computation in Suffix Arrays and Its
  *   Applications" (Kasai et al. 2001), for the LCP array computation.
@@ -162,8 +162,6 @@
 #include "wimlib/lz_sarray.h"
 #include "wimlib/lzx.h"
 #include "wimlib/util.h"
-#include <pthread.h>
-#include <math.h>
 #include <string.h>
 
 #ifdef ENABLE_LZX_DEBUG
@@ -171,7 +169,7 @@
 #endif
 
 typedef u32 block_cost_t;
-#define INFINITE_BLOCK_COST	((block_cost_t)~0U)
+#define INFINITE_BLOCK_COST	(~(block_cost_t)0)
 
 #define LZX_OPTIM_ARRAY_SIZE	4096
 
@@ -264,50 +262,11 @@ struct lzx_block_spec {
 	struct lzx_codes codes;
 };
 
-/*
- * An array of these structures is used during the match-choosing algorithm.
- * They correspond to consecutive positions in the window and are used to keep
- * track of the cost to reach each position, and the match/literal choices that
- * need to be chosen to reach that position.
- */
-struct lzx_optimal {
-	/* The approximate minimum cost, in bits, to reach this position in the
-	 * window which has been found so far.  */
-	block_cost_t cost;
-
-	/* The union here is just for clarity, since the fields are used in two
-	 * slightly different ways.  Initially, the @prev structure is filled in
-	 * first, and links go from later in the window to earlier in the
-	 * window.  Later, @next structure is filled in and links go from
-	 * earlier in the window to later in the window.  */
-	union {
-		struct {
-			/* Position of the start of the match or literal that
-			 * was taken to get to this position in the approximate
-			 * minimum-cost parse.  */
-			input_idx_t link;
-
-			/* Offset (as in an LZ (length, offset) pair) of the
-			 * match or literal that was taken to get to this
-			 * position in the approximate minimum-cost parse.  */
-			input_idx_t match_offset;
-		} prev;
-		struct {
-			/* Position at which the match or literal starting at
-			 * this position ends in the minimum-cost parse.  */
-			input_idx_t link;
-
-			/* Offset (as in an LZ (length, offset) pair) of the
-			 * match or literal starting at this position in the
-			 * approximate minimum-cost parse.  */
-			input_idx_t match_offset;
-		} next;
-	};
-
-	/* The match offset LRU queue that will exist when the approximate
-	 * minimum-cost path to reach this position is taken.  */
-	struct lzx_lru_queue queue;
-};
+/* Include template for the match-choosing algorithm.  */
+#define LZ_COMPRESSOR		struct lzx_compressor
+#define LZ_ADAPTIVE_STATE	struct lzx_lru_queue
+struct lzx_compressor;
+#include "wimlib/lz_optimal.h"
 
 /* State of the LZX compressor.  */
 struct lzx_compressor {
@@ -326,8 +285,8 @@ struct lzx_compressor {
 	 * chunks.
 	 *
 	 * We reserve a few extra bytes to potentially allow reading off the end
-	 * of the array in the match-finding code for optimization purposes.
-	 */
+	 * of the array in the match-finding code for optimization purposes
+	 * (currently only needed for the hash chain match-finder).  */
 	u8 *window;
 
 	/* Number of bytes of data to be compressed, which is the number of
@@ -387,23 +346,8 @@ struct lzx_compressor {
 	unsigned cached_matches_pos;
 	bool matches_cached;
 
-	/* Slow algorithm only: Temporary space used for match-choosing
-	 * algorithm.
-	 *
-	 * The size of this array must be at least LZX_MAX_MATCH_LEN but
-	 * otherwise is arbitrary.  More space simply allows the match-choosing
-	 * algorithm to potentially find better matches (depending on the input,
-	 * as always).  */
-	struct lzx_optimal *optimum;
-
-	/* Slow algorithm only: Variables used by the match-choosing algorithm.
-	 *
-	 * When matches have been chosen, optimum_cur_idx is set to the position
-	 * in the window of the next match/literal to return and optimum_end_idx
-	 * is set to the position in the window at the end of the last
-	 * match/literal to return.  */
-	u32 optimum_cur_idx;
-	u32 optimum_end_idx;
+	/* Match chooser.  */
+	struct lz_match_chooser mc;
 };
 
 /* Returns the LZX position slot that corresponds to a given match offset,
@@ -471,13 +415,18 @@ lzx_make_huffman_codes(const struct lzx_freqs *freqs,
 }
 
 /*
- * Output an LZX match.
+ * Output a precomputed LZX match.
  *
- * @out:         The bitstream to write the match to.
- * @block_type:  The type of the LZX block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM)
- * @match:	 The match.
- * @codes:	 Pointer to a structure that contains the codewords for the
- *		 main, length, and aligned offset Huffman codes.
+ * @out:
+ *	The bitstream to which to write the match.
+ * @block_type:
+ *	The type of the LZX block (LZX_BLOCKTYPE_ALIGNED or
+ *	LZX_BLOCKTYPE_VERBATIM)
+ * @match:
+ *	The match, as a (length, offset) pair.
+ * @codes:
+ *	Pointer to a structure that contains the codewords for the main, length,
+ *	and aligned offset Huffman codes for the current LZX compressed block.
  */
 static void
 lzx_write_match(struct output_bitstream *out, int block_type,
@@ -507,9 +456,6 @@ lzx_write_match(struct output_bitstream *out, int block_type,
 	 * MIN_MATCH_LEN. */
 	if (match_len_minus_2 < LZX_NUM_PRIMARY_LENS) {
 		len_header = match_len_minus_2;
-		/* No length footer-- mark it with a special
-		 * value. */
-		len_footer = (unsigned)(-1);
 	} else {
 		len_header = LZX_NUM_PRIMARY_LENS;
 		len_footer = match_len_minus_2 - LZX_NUM_PRIMARY_LENS;
@@ -529,10 +475,9 @@ lzx_write_match(struct output_bitstream *out, int block_type,
 
 	/* If there is a length footer, output it using the
 	 * length Huffman code. */
-	if (len_footer != (unsigned)(-1)) {
+	if (len_header == LZX_NUM_PRIMARY_LENS)
 		bitstream_put_bits(out, codes->codewords.len[len_footer],
 				   codes->lens.len[len_footer]);
-	}
 
 	num_extra_bits = lzx_get_num_extra_bits(position_slot);
 
@@ -557,6 +502,16 @@ lzx_write_match(struct output_bitstream *out, int block_type,
 	}
 }
 
+/* Output an LZX literal (encoded with the main Huffman code).  */
+static void
+lzx_write_literal(struct output_bitstream *out, u8 literal,
+		  const struct lzx_codes *codes)
+{
+	bitstream_put_bits(out,
+			   codes->codewords.main[literal],
+			   codes->lens.main[literal]);
+}
+
 static unsigned
 lzx_build_precode(const u8 lens[restrict],
 		  const u8 prev_lens[restrict],
@@ -696,23 +651,33 @@ lzx_build_precode(const u8 lens[restrict],
 }
 
 /*
- * Writes a compressed Huffman code to the output, preceded by the precode for
- * it.
+ * Output a Huffman code in the compressed form used in LZX.
+ *
+ * The Huffman code is represented in the output as a logical series of codeword
+ * lengths from which the Huffman code, which must be in canonical form, can be
+ * reconstructed.
+ *
+ * The codeword lengths are themselves compressed using a separate Huffman code,
+ * the "precode", which contains a symbol for each possible codeword length in
+ * the larger code as well as several special symbols to represent repeated
+ * codeword lengths (a form of run-length encoding).  The precode is itself
+ * constructed in canonical form, and its codeword lengths are represented
+ * literally in 20 4-bit fields that immediately precede the compressed codeword
+ * lengths of the larger code.
  *
- * The Huffman code is represented in the output as a series of path lengths
- * from which the canonical Huffman code can be reconstructed.  The path lengths
- * themselves are compressed using a separate Huffman code, the precode, which
- * consists of LZX_PRECODE_NUM_SYMBOLS (= 20) symbols that cover all possible
- * code lengths, plus extra codes for repeated lengths.  The path lengths of the
- * precode precede the path lengths of the larger code and are uncompressed,
- * consisting of 20 entries of 4 bits each.
+ * Furthermore, the codeword lengths of the larger code are actually represented
+ * as deltas from the codeword lengths of the corresponding code in the previous
+ * block.
  *
- * @out:		Bitstream to write the code to.
- * @lens:		The code lengths for the Huffman code, indexed by symbol.
- * @prev_lens:		Code lengths for this Huffman code, indexed by symbol,
- *			in the *previous block*, or all zeroes if this is the
- *			first block.
- * @num_syms:		The number of symbols in the code.
+ * @out:
+ *	Bitstream to which to write the compressed Huffman code.
+ * @lens:
+ *	The codeword lengths, indexed by symbol, in the Huffman code.
+ * @prev_lens:
+ *	The codeword lengths, indexed by symbol, in the corresponding Huffman
+ *	code in the previous block, or all zeroes if this is the first block.
+ * @num_syms:
+ *	The number of symbols in the Huffman code.
  */
 static void
 lzx_write_compressed_code(struct output_bitstream *out,
@@ -771,20 +736,22 @@ lzx_write_compressed_code(struct output_bitstream *out,
 }
 
 /*
- * Writes all compressed matches and literal bytes in an LZX block to the the
- * output bitstream.
+ * Write all matches and literal bytes (which were precomputed) in an LZX
+ * compressed block to the output bitstream in the final compressed
+ * representation.
  *
  * @ostream
  *	The output bitstream.
  * @block_type
- *	The type of the block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM).
+ *	The chosen type of the LZX compressed block (LZX_BLOCKTYPE_ALIGNED or
+ *	LZX_BLOCKTYPE_VERBATIM).
  * @match_tab
- *	The array of matches/literals that will be output (length @match_count).
+ *	The array of matches/literals to output.
  * @match_count
- *	Number of matches/literals to be output.
+ *	Number of matches/literals to output (length of @match_tab).
  * @codes
- *	Pointer to a structure that contains the codewords for the main, length,
- *	and aligned offset Huffman codes.
+ *	The main, length, and aligned offset Huffman codes for the current
+ *	LZX compressed block.
  */
 static void
 lzx_write_matches_and_literals(struct output_bitstream *ostream,
@@ -796,18 +763,13 @@ lzx_write_matches_and_literals(struct output_bitstream *ostream,
 	for (unsigned i = 0; i < match_count; i++) {
 		struct lzx_match match = match_tab[i];
 
-		/* High bit of the match indicates whether the match is an
-		 * actual match (1) or a literal uncompressed byte (0)  */
-		if (match.data & 0x80000000) {
-			/* match */
-			lzx_write_match(ostream, block_type,
-					match, codes);
-		} else {
-			/* literal byte */
-			bitstream_put_bits(ostream,
-					   codes->codewords.main[match.data],
-					   codes->lens.main[match.data]);
-		}
+		/* The high bit of the 32-bit intermediate representation
+		 * indicates whether the item is an actual LZ-style match (1) or
+		 * a literal byte (0).  */
+		if (match.data & 0x80000000)
+			lzx_write_match(ostream, block_type, match, codes);
+		else
+			lzx_write_literal(ostream, match.data, codes);
 	}
 }
 
@@ -996,8 +958,8 @@ lzx_tally_match(unsigned match_len, unsigned match_offset,
 	/* The match offset shall be encoded as a position slot (itself encoded
 	 * as part of the main symbol) and a position footer.  */
 	position_slot = lzx_get_position_slot(match_offset, queue);
-	position_footer = (match_offset + LZX_OFFSET_OFFSET) &
-				((1U << lzx_get_num_extra_bits(position_slot)) - 1);
+	position_footer = (match_offset + LZX_OFFSET_OFFSET) -
+				lzx_position_base[position_slot];
 
 	/* The match length shall be encoded as a length header (itself encoded
 	 * as part of the main symbol) and an optional length footer.  */
@@ -1172,7 +1134,7 @@ lzx_set_costs(struct lzx_compressor * ctx, const struct lzx_lens * lens)
 /* Tell the match-finder to skip the specified number of bytes (@n) in the
  * input.  */
 static void
-lzx_lz_skip_bytes(struct lzx_compressor *ctx, unsigned n)
+lzx_lz_skip_bytes(struct lzx_compressor *ctx, input_idx_t n)
 {
 	LZX_ASSERT(n <= ctx->match_window_end - ctx->match_window_pos);
 	if (ctx->matches_cached) {
@@ -1193,14 +1155,14 @@ lzx_lz_skip_bytes(struct lzx_compressor *ctx, unsigned n)
 
 /* Retrieve a list of matches available at the next position in the input.
  *
- * The matches are written to ctx->matches in decreasing order of length, and
- * the return value is the number of matches found.  */
-static unsigned
+ * A pointer to the matches array is written into @matches_ret, and the return
+ * value is the number of matches found.  */
+static u32
 lzx_lz_get_matches_caching(struct lzx_compressor *ctx,
 			   const struct lzx_lru_queue *queue,
 			   struct raw_match **matches_ret)
 {
-	unsigned num_matches;
+	u32 num_matches;
 	struct raw_match *matches;
 
 	LZX_ASSERT(ctx->match_window_pos <= ctx->match_window_end);
@@ -1224,7 +1186,7 @@ lzx_lz_get_matches_caching(struct lzx_compressor *ctx,
 	 * if it is not the whole window.  */
 	if (ctx->match_window_end < ctx->window_size) {
 		unsigned maxlen = ctx->match_window_end - ctx->match_window_pos;
-		for (unsigned i = 0; i < num_matches; i++)
+		for (u32 i = 0; i < num_matches; i++)
 			if (matches[i].len > maxlen)
 				matches[i].len = maxlen;
 	}
@@ -1236,7 +1198,7 @@ lzx_lz_get_matches_caching(struct lzx_compressor *ctx,
 #endif
 
 #ifdef ENABLE_LZX_DEBUG
-	for (unsigned i = 0; i < num_matches; i++) {
+	for (u32 i = 0; i < num_matches; i++) {
 		LZX_ASSERT(matches[i].len >= LZX_MIN_MATCH_LEN);
 		LZX_ASSERT(matches[i].len <= LZX_MAX_MATCH_LEN);
 		LZX_ASSERT(matches[i].len <= ctx->match_window_end - ctx->match_window_pos);
@@ -1252,275 +1214,61 @@ lzx_lz_get_matches_caching(struct lzx_compressor *ctx,
 	return num_matches;
 }
 
-/*
- * Reverse the linked list of near-optimal matches so that they can be returned
- * in forwards order.
- *
- * Returns the first match in the list.
- */
-static struct raw_match
-lzx_lz_reverse_near_optimal_match_list(struct lzx_compressor *ctx,
-				       unsigned cur_pos)
+static u32
+lzx_get_prev_literal_cost(struct lzx_compressor *ctx,
+			  struct lzx_lru_queue *queue)
 {
-	unsigned prev_link, saved_prev_link;
-	unsigned prev_match_offset, saved_prev_match_offset;
-
-	ctx->optimum_end_idx = cur_pos;
-
-	saved_prev_link = ctx->optimum[cur_pos].prev.link;
-	saved_prev_match_offset = ctx->optimum[cur_pos].prev.match_offset;
-
-	do {
-		prev_link = saved_prev_link;
-		prev_match_offset = saved_prev_match_offset;
-
-		saved_prev_link = ctx->optimum[prev_link].prev.link;
-		saved_prev_match_offset = ctx->optimum[prev_link].prev.match_offset;
-
-		ctx->optimum[prev_link].next.link = cur_pos;
-		ctx->optimum[prev_link].next.match_offset = prev_match_offset;
-
-		cur_pos = prev_link;
-	} while (cur_pos != 0);
-
-	ctx->optimum_cur_idx = ctx->optimum[0].next.link;
+	return lzx_literal_cost(ctx->window[ctx->match_window_pos - 1],
+				&ctx->costs);
+}
 
-	return (struct raw_match)
-		{ .len = ctx->optimum_cur_idx,
-		  .offset = ctx->optimum[0].next.match_offset,
-		};
+static u32
+lzx_get_match_cost(struct lzx_compressor *ctx,
+		   struct lzx_lru_queue *queue,
+		   input_idx_t length, input_idx_t offset)
+{
+	return lzx_match_cost(length, offset, &ctx->costs, queue);
 }
 
-/*
- * lzx_lz_get_near_optimal_match() -
- *
- * Choose the optimal match or literal to use at the next position in the input.
- *
- * Unlike a greedy parser that always takes the longest match, or even a
- * parser with one match/literal look-ahead like zlib, the algorithm used here
- * may look ahead many matches/literals to determine the optimal match/literal to
- * output next.  The motivation is that the compression ratio is improved if the
- * compressor can do things like use a shorter-than-possible match in order to
- * allow a longer match later, and also take into account the Huffman code cost
- * model rather than simply assuming that longer is better.
- *
- * Still, this is not truly an optimal parser because very long matches are
- * taken immediately, and the raw match-finder takes some shortcuts.  This is
- * done to avoid considering many different alternatives that are unlikely to
- * be significantly better.
- *
- * This algorithm is based on that used in 7-Zip's DEFLATE encoder.
- *
- * Each call to this function does one of two things:
- *
- * 1. Build a near-optimal sequence of matches/literals, up to some point, that
- *    will be returned by subsequent calls to this function, then return the
- *    first one.
- *
- * OR
- *
- * 2. Return the next match/literal previously computed by a call to this
- *    function;
- *
- * This function relies on the following state in the compressor context:
- *
- *	ctx->window	     (read-only: preprocessed data being compressed)
- *	ctx->cost	     (read-only: cost model to use)
- *	ctx->optimum	     (internal state; leave uninitialized)
- *	ctx->optimum_cur_idx (must set to 0 before first call)
- *	ctx->optimum_end_idx (must set to 0 before first call)
- *
- *	Plus any state used by the raw match-finder.
- *
- * The return value is a (length, offset) pair specifying the match or literal
- * chosen.  For literals, the length is less than LZX_MIN_MATCH_LEN and the
- * offset is meaningless.
- */
 static struct raw_match
-lzx_lz_get_near_optimal_match(struct lzx_compressor * ctx)
+lzx_lz_get_near_optimal_match(struct lzx_compressor *ctx)
 {
-	unsigned num_possible_matches;
-	struct raw_match *possible_matches;
-	struct raw_match match;
-	unsigned longest_match_len;
-
-	if (ctx->optimum_cur_idx != ctx->optimum_end_idx) {
-		/* Case 2: Return the next match/literal already found.  */
-		match.len = ctx->optimum[ctx->optimum_cur_idx].next.link -
-				    ctx->optimum_cur_idx;
-		match.offset = ctx->optimum[ctx->optimum_cur_idx].next.match_offset;
-
-		ctx->optimum_cur_idx = ctx->optimum[ctx->optimum_cur_idx].next.link;
-		return match;
-	}
-
-	/* Case 1:  Compute a new list of matches/literals to return.  */
-
-	ctx->optimum_cur_idx = 0;
-	ctx->optimum_end_idx = 0;
-
-	/* Get matches at this position.  */
-	num_possible_matches = lzx_lz_get_matches_caching(ctx, &ctx->queue, &possible_matches);
-
-	/* If no matches found, return literal.  */
-	if (num_possible_matches == 0)
-		return (struct raw_match){ .len = 0 };
-
-	/* The matches that were found are sorted in decreasing order by length.
-	 * Get the length of the longest one.  */
-	longest_match_len = possible_matches[0].len;
-
-	/* Greedy heuristic:  if the longest match that was found is greater
-	 * than the number of fast bytes, return it immediately; don't both
-	 * doing more work.  */
-	if (longest_match_len > ctx->params.alg_params.slow.num_fast_bytes) {
-		lzx_lz_skip_bytes(ctx, longest_match_len - 1);
-		return possible_matches[0];
-	}
-
-	/* Calculate the cost to reach the next position by outputting a
-	 * literal.  */
-	ctx->optimum[0].queue = ctx->queue;
-	ctx->optimum[1].queue = ctx->optimum[0].queue;
-	ctx->optimum[1].cost = lzx_literal_cost(ctx->window[ctx->match_window_pos],
-						&ctx->costs);
-	ctx->optimum[1].prev.link = 0;
-
-	/* Calculate the cost to reach any position up to and including that
-	 * reached by the longest match, using the shortest (i.e. closest) match
-	 * that reaches each position.  */
-	BUILD_BUG_ON(LZX_MIN_MATCH_LEN != 2);
-	for (unsigned len = LZX_MIN_MATCH_LEN, match_idx = num_possible_matches - 1;
-	     len <= longest_match_len; len++) {
-
-		LZX_ASSERT(match_idx < num_possible_matches);
-
-		ctx->optimum[len].queue = ctx->optimum[0].queue;
-		ctx->optimum[len].prev.link = 0;
-		ctx->optimum[len].prev.match_offset = possible_matches[match_idx].offset;
-		ctx->optimum[len].cost = lzx_match_cost(len,
-							possible_matches[match_idx].offset,
-							&ctx->costs,
-							&ctx->optimum[len].queue);
-		if (len == possible_matches[match_idx].len)
-			match_idx--;
-	}
-
-	unsigned cur_pos = 0;
-
-	/* len_end: greatest index forward at which costs have been calculated
-	 * so far  */
-	unsigned len_end = longest_match_len;
-
-	for (;;) {
-		/* Advance to next position.  */
-		cur_pos++;
-
-		if (cur_pos == len_end || cur_pos == LZX_OPTIM_ARRAY_SIZE)
-			return lzx_lz_reverse_near_optimal_match_list(ctx, cur_pos);
-
-		/* retrieve the number of matches available at this position  */
-		num_possible_matches = lzx_lz_get_matches_caching(ctx, &ctx->optimum[cur_pos].queue,
-								  &possible_matches);
-
-		unsigned new_len = 0;
-
-		if (num_possible_matches != 0) {
-			new_len = possible_matches[0].len;
-
-			/* Greedy heuristic:  if we found a match greater than
-			 * the number of fast bytes, stop immediately.  */
-			if (new_len > ctx->params.alg_params.slow.num_fast_bytes) {
-
-				/* Build the list of matches to return and get
-				 * the first one.  */
-				match = lzx_lz_reverse_near_optimal_match_list(ctx, cur_pos);
-
-				/* Append the long match to the end of the list.  */
-				ctx->optimum[cur_pos].next.match_offset =
-					possible_matches[0].offset;
-				ctx->optimum[cur_pos].next.link = cur_pos + new_len;
-				ctx->optimum_end_idx = cur_pos + new_len;
-
-				/* Skip over the remaining bytes of the long match.  */
-				lzx_lz_skip_bytes(ctx, new_len - 1);
-
-				/* Return first match in the list  */
-				return match;
-			}
-		}
-
-		/* Consider proceeding with a literal byte.  */
-		block_cost_t cur_cost = ctx->optimum[cur_pos].cost;
-		block_cost_t cur_plus_literal_cost = cur_cost +
-			lzx_literal_cost(ctx->window[ctx->match_window_pos - 1],
-					 &ctx->costs);
-		if (cur_plus_literal_cost < ctx->optimum[cur_pos + 1].cost) {
-			ctx->optimum[cur_pos + 1].cost = cur_plus_literal_cost;
-			ctx->optimum[cur_pos + 1].prev.link = cur_pos;
-			ctx->optimum[cur_pos + 1].queue = ctx->optimum[cur_pos].queue;
-		}
-
-		if (num_possible_matches == 0)
-			continue;
-
-		/* Consider proceeding with a match.  */
-
-		while (len_end < cur_pos + new_len)
-			ctx->optimum[++len_end].cost = INFINITE_BLOCK_COST;
-
-		for (unsigned len = LZX_MIN_MATCH_LEN, match_idx = num_possible_matches - 1;
-		     len <= new_len; len++) {
-			LZX_ASSERT(match_idx < num_possible_matches);
-			struct lzx_lru_queue q = ctx->optimum[cur_pos].queue;
-			block_cost_t cost = cur_cost + lzx_match_cost(len,
-								      possible_matches[match_idx].offset,
-								      &ctx->costs,
-								      &q);
-
-			if (cost < ctx->optimum[cur_pos + len].cost) {
-				ctx->optimum[cur_pos + len].cost = cost;
-				ctx->optimum[cur_pos + len].prev.link = cur_pos;
-				ctx->optimum[cur_pos + len].prev.match_offset =
-						possible_matches[match_idx].offset;
-				ctx->optimum[cur_pos + len].queue = q;
-			}
-
-			if (len == possible_matches[match_idx].len)
-				match_idx--;
-		}
-	}
+	return lz_get_near_optimal_match(&ctx->mc,
+					 lzx_lz_get_matches_caching,
+					 lzx_lz_skip_bytes,
+					 lzx_get_prev_literal_cost,
+					 lzx_get_match_cost,
+					 ctx,
+					 &ctx->queue);
 }
 
-/*
- * Set default symbol costs.
- */
+/* Set default symbol costs for the LZX Huffman codes.  */
 static void
 lzx_set_default_costs(struct lzx_costs * costs, unsigned num_main_syms)
 {
 	unsigned i;
 
-	/* Literal symbols  */
+	/* Main code (part 1): Literal symbols  */
 	for (i = 0; i < LZX_NUM_CHARS; i++)
 		costs->main[i] = 8;
 
-	/* Match header symbols  */
+	/* Main code (part 2): Match header symbols  */
 	for (; i < num_main_syms; i++)
 		costs->main[i] = 10;
 
-	/* Length symbols  */
+	/* Length code  */
 	for (i = 0; i < LZX_LENCODE_NUM_SYMBOLS; i++)
 		costs->len[i] = 8;
 
-	/* Aligned offset symbols  */
+	/* Aligned offset code  */
 	for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++)
 		costs->aligned[i] = 3;
 }
 
-/* Given the frequencies of symbols in a compressed block and the corresponding
- * Huffman codes, return LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM if an
- * aligned offset or verbatim block, respectively, will take fewer bits to
- * output.  */
+/* Given the frequencies of symbols in an LZX-compressed block and the
+ * corresponding Huffman codes, return LZX_BLOCKTYPE_ALIGNED or
+ * LZX_BLOCKTYPE_VERBATIM if an aligned offset or verbatim block, respectively,
+ * will take fewer bits to output.  */
 static int
 lzx_choose_verbatim_or_aligned(const struct lzx_freqs * freqs,
 			       const struct lzx_codes * codes)
@@ -1530,8 +1278,8 @@ lzx_choose_verbatim_or_aligned(const struct lzx_freqs * freqs,
 
 	/* Verbatim blocks have a constant 3 bits per position footer.  Aligned
 	 * offset blocks have an aligned offset symbol per position footer, plus
-	 * an extra 24 bits to output the lengths necessary to reconstruct the
-	 * aligned offset code itself.  */
+	 * an extra 24 bits per block to output the lengths necessary to
+	 * reconstruct the aligned offset code itself.  */
 	for (unsigned i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
 		verbatim_cost += 3 * freqs->aligned[i];
 		aligned_cost += codes->lens.aligned[i] * freqs->aligned[i];
@@ -1544,8 +1292,8 @@ lzx_choose_verbatim_or_aligned(const struct lzx_freqs * freqs,
 }
 
 /* Find a near-optimal sequence of matches/literals with which to output the
- * specified LZX block, then set its type to that which has the minimum cost to
- * output.  */
+ * specified LZX block, then set the block's type to that which has the minimum
+ * cost to output (either verbatim or aligned).  */
 static void
 lzx_optimize_block(struct lzx_compressor *ctx, struct lzx_block_spec *spec,
 		   unsigned num_passes)
@@ -1580,9 +1328,44 @@ lzx_optimize_block(struct lzx_compressor *ctx, struct lzx_block_spec *spec,
 
 			raw_match = lzx_lz_get_near_optimal_match(ctx);
 			if (raw_match.len >= LZX_MIN_MATCH_LEN) {
-				lzx_match.data = lzx_tally_match(raw_match.len, raw_match.offset,
-								 &freqs, &ctx->queue);
-				i += raw_match.len;
+				if (unlikely(raw_match.len == LZX_MIN_MATCH_LEN &&
+					     raw_match.offset == ctx->max_window_size -
+								 LZX_MIN_MATCH_LEN))
+				{
+					/* Degenerate case where the parser
+					 * generated the minimum match length
+					 * with the maximum offset.  There
+					 * aren't actually enough position slots
+					 * to represent this offset, as noted in
+					 * the comments in
+					 * lzx_get_num_main_syms(), so we cannot
+					 * allow it.  Use literals instead.
+					 *
+					 * Note that this case only occurs if
+					 * the match-finder can generate matches
+					 * to the very start of the window.  The
+					 * suffix array match-finder can,
+					 * although typical hash chain and
+					 * binary tree match-finders use 0 as a
+					 * null value and therefore cannot
+					 * generate such matches.  */
+					BUILD_BUG_ON(LZX_MIN_MATCH_LEN != 2);
+					lzx_match.data = lzx_tally_literal(ctx->window[i],
+									   &freqs);
+					i += 1;
+					ctx->chosen_matches[spec->chosen_matches_start_pos +
+							    spec->num_chosen_matches++]
+							    = lzx_match;
+					lzx_match.data = lzx_tally_literal(ctx->window[i],
+									   &freqs);
+					i += 1;
+				} else {
+					lzx_match.data = lzx_tally_match(raw_match.len,
+									 raw_match.offset,
+									 &freqs,
+									 &ctx->queue);
+					i += raw_match.len;
+				}
 			} else {
 				lzx_match.data = lzx_tally_literal(ctx->window[i], &freqs);
 				i += 1;
@@ -1605,8 +1388,7 @@ static void
 lzx_optimize_blocks(struct lzx_compressor *ctx)
 {
 	lzx_lru_queue_init(&ctx->queue);
-	ctx->optimum_cur_idx = 0;
-	ctx->optimum_end_idx = 0;
+	lz_match_chooser_begin(&ctx->mc);
 
 	const unsigned num_passes = ctx->params.alg_params.slow.num_optim_passes;
 
@@ -1627,6 +1409,9 @@ lzx_prepare_blocks(struct lzx_compressor * ctx)
 	/* Set up a default cost model.  */
 	lzx_set_default_costs(&ctx->costs, ctx->num_main_syms);
 
+	/* TODO: The compression ratio could be slightly improved by performing
+	 * data-dependent block splitting instead of using fixed-size blocks.
+	 * Doing so well is a computationally hard problem, however.  */
 	ctx->num_blocks = DIV_ROUND_UP(ctx->window_size, LZX_DIV_BLOCK_SIZE);
 	for (unsigned i = 0; i < ctx->num_blocks; i++) {
 		unsigned pos = LZX_DIV_BLOCK_SIZE * i;
@@ -1844,53 +1629,6 @@ lzx_compress(const void *uncompressed_data, size_t uncompressed_size,
 	return compressed_size;
 }
 
-static bool
-lzx_params_valid(const struct wimlib_lzx_compressor_params *params)
-{
-	/* Validate parameters.  */
-	if (params->hdr.size != sizeof(struct wimlib_lzx_compressor_params)) {
-		LZX_DEBUG("Invalid parameter structure size!");
-		return false;
-	}
-
-	if (params->algorithm != WIMLIB_LZX_ALGORITHM_SLOW &&
-	    params->algorithm != WIMLIB_LZX_ALGORITHM_FAST)
-	{
-		LZX_DEBUG("Invalid algorithm.");
-		return false;
-	}
-
-	if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
-		if (params->alg_params.slow.num_optim_passes < 1)
-		{
-			LZX_DEBUG("Invalid number of optimization passes!");
-			return false;
-		}
-
-		if (params->alg_params.slow.main_nostat_cost < 1 ||
-		    params->alg_params.slow.main_nostat_cost > 16)
-		{
-			LZX_DEBUG("Invalid main_nostat_cost!");
-			return false;
-		}
-
-		if (params->alg_params.slow.len_nostat_cost < 1 ||
-		    params->alg_params.slow.len_nostat_cost > 16)
-		{
-			LZX_DEBUG("Invalid len_nostat_cost!");
-			return false;
-		}
-
-		if (params->alg_params.slow.aligned_nostat_cost < 1 ||
-		    params->alg_params.slow.aligned_nostat_cost > 8)
-		{
-			LZX_DEBUG("Invalid aligned_nostat_cost!");
-			return false;
-		}
-	}
-	return true;
-}
-
 static void
 lzx_free_compressor(void *_ctx)
 {
@@ -1899,7 +1637,7 @@ lzx_free_compressor(void *_ctx)
 	if (ctx) {
 		FREE(ctx->chosen_matches);
 		FREE(ctx->cached_matches);
-		FREE(ctx->optimum);
+		lz_match_chooser_destroy(&ctx->mc);
 		lz_sarray_destroy(&ctx->lz_sarray);
 		FREE(ctx->block_specs);
 		FREE(ctx->prev_tab);
@@ -1908,13 +1646,63 @@ lzx_free_compressor(void *_ctx)
 	}
 }
 
+static const struct wimlib_lzx_compressor_params lzx_fast_default = {
+	.hdr = {
+		.size = sizeof(struct wimlib_lzx_compressor_params),
+	},
+	.algorithm = WIMLIB_LZX_ALGORITHM_FAST,
+	.use_defaults = 0,
+	.alg_params = {
+		.fast = {
+		},
+	},
+};
+static const struct wimlib_lzx_compressor_params lzx_slow_default = {
+	.hdr = {
+		.size = sizeof(struct wimlib_lzx_compressor_params),
+	},
+	.algorithm = WIMLIB_LZX_ALGORITHM_SLOW,
+	.use_defaults = 0,
+	.alg_params = {
+		.slow = {
+			.use_len2_matches = 1,
+			.nice_match_length = 32,
+			.num_optim_passes = 2,
+			.max_search_depth = 50,
+			.max_matches_per_pos = 3,
+			.main_nostat_cost = 15,
+			.len_nostat_cost = 15,
+			.aligned_nostat_cost = 7,
+		},
+	},
+};
+
+static const struct wimlib_lzx_compressor_params *
+lzx_get_params(const struct wimlib_compressor_params_header *_params)
+{
+	const struct wimlib_lzx_compressor_params *params =
+		(const struct wimlib_lzx_compressor_params*)_params;
+
+	if (params == NULL) {
+		LZX_DEBUG("Using default algorithm and parameters.");
+		params = &lzx_slow_default;
+	} else {
+		if (params->use_defaults) {
+			if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW)
+				params = &lzx_slow_default;
+			else
+				params = &lzx_fast_default;
+		}
+	}
+	return params;
+}
+
 static int
 lzx_create_compressor(size_t window_size,
 		      const struct wimlib_compressor_params_header *_params,
 		      void **ctx_ret)
 {
-	const struct wimlib_lzx_compressor_params *params =
-		(const struct wimlib_lzx_compressor_params*)_params;
+	const struct wimlib_lzx_compressor_params *params = lzx_get_params(_params);
 	struct lzx_compressor *ctx;
 
 	LZX_DEBUG("Allocating LZX context...");
@@ -1922,52 +1710,6 @@ lzx_create_compressor(size_t window_size,
 	if (!lzx_window_size_valid(window_size))
 		return WIMLIB_ERR_INVALID_PARAM;
 
-	static const struct wimlib_lzx_compressor_params fast_default = {
-		.hdr = {
-			.size = sizeof(struct wimlib_lzx_compressor_params),
-		},
-		.algorithm = WIMLIB_LZX_ALGORITHM_FAST,
-		.use_defaults = 0,
-		.alg_params = {
-			.fast = {
-			},
-		},
-	};
-	static const struct wimlib_lzx_compressor_params slow_default = {
-		.hdr = {
-			.size = sizeof(struct wimlib_lzx_compressor_params),
-		},
-		.algorithm = WIMLIB_LZX_ALGORITHM_SLOW,
-		.use_defaults = 0,
-		.alg_params = {
-			.slow = {
-				.use_len2_matches = 1,
-				.num_fast_bytes = 32,
-				.num_optim_passes = 2,
-				.max_search_depth = 50,
-				.max_matches_per_pos = 3,
-				.main_nostat_cost = 15,
-				.len_nostat_cost = 15,
-				.aligned_nostat_cost = 7,
-			},
-		},
-	};
-
-	if (params) {
-		if (!lzx_params_valid(params))
-			return WIMLIB_ERR_INVALID_PARAM;
-	} else {
-		LZX_DEBUG("Using default algorithm and parameters.");
-		params = &slow_default;
-	}
-
-	if (params->use_defaults) {
-		if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW)
-			params = &slow_default;
-		else
-			params = &fast_default;
-	}
-
 	LZX_DEBUG("Allocating memory.");
 
 	ctx = CALLOC(1, sizeof(struct lzx_compressor));
@@ -2006,9 +1748,10 @@ lzx_create_compressor(size_t window_size,
 	}
 
 	if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
-		ctx->optimum = MALLOC((LZX_OPTIM_ARRAY_SIZE + LZX_MAX_MATCH_LEN) *
-				       sizeof(ctx->optimum[0]));
-		if (ctx->optimum == NULL)
+		if (!lz_match_chooser_init(&ctx->mc,
+					   LZX_OPTIM_ARRAY_SIZE,
+					   params->alg_params.slow.nice_match_length,
+					   LZX_MAX_MATCH_LEN))
 			goto oom;
 	}
 
@@ -2042,7 +1785,95 @@ oom:
 	return WIMLIB_ERR_NOMEM;
 }
 
+static u64
+lzx_get_needed_memory(size_t max_block_size,
+		      const struct wimlib_compressor_params_header *_params)
+{
+	const struct wimlib_lzx_compressor_params *params = lzx_get_params(_params);
+
+	u64 size = 0;
+
+	size += sizeof(struct lzx_compressor);
+
+	size += max_block_size + 12;
+
+	size += DIV_ROUND_UP(max_block_size, LZX_DIV_BLOCK_SIZE) *
+		sizeof(((struct lzx_compressor*)0)->block_specs[0]);
+
+	if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
+		size += max_block_size * sizeof(((struct lzx_compressor*)0)->chosen_matches[0]);
+		size += lz_sarray_get_needed_memory(max_block_size);
+		size += lz_match_chooser_get_needed_memory(LZX_OPTIM_ARRAY_SIZE,
+							   params->alg_params.slow.nice_match_length,
+							   LZX_MAX_MATCH_LEN);
+		u32 cache_per_pos;
+
+		cache_per_pos = params->alg_params.slow.max_matches_per_pos;
+		if (cache_per_pos > LZX_MAX_CACHE_PER_POS)
+			cache_per_pos = LZX_MAX_CACHE_PER_POS;
+
+		size += max_block_size * (cache_per_pos + 1) *
+			sizeof(((struct lzx_compressor*)0)->cached_matches[0]);
+	} else {
+		size += max_block_size * sizeof(((struct lzx_compressor*)0)->prev_tab[0]);
+	}
+	return size;
+}
+
+static bool
+lzx_params_valid(const struct wimlib_compressor_params_header *_params)
+{
+	const struct wimlib_lzx_compressor_params *params =
+		(const struct wimlib_lzx_compressor_params*)_params;
+
+	if (params->hdr.size != sizeof(struct wimlib_lzx_compressor_params)) {
+		LZX_DEBUG("Invalid parameter structure size!");
+		return false;
+	}
+
+	if (params->algorithm != WIMLIB_LZX_ALGORITHM_SLOW &&
+	    params->algorithm != WIMLIB_LZX_ALGORITHM_FAST)
+	{
+		LZX_DEBUG("Invalid algorithm.");
+		return false;
+	}
+
+	if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW &&
+	    !params->use_defaults)
+	{
+		if (params->alg_params.slow.num_optim_passes < 1)
+		{
+			LZX_DEBUG("Invalid number of optimization passes!");
+			return false;
+		}
+
+		if (params->alg_params.slow.main_nostat_cost < 1 ||
+		    params->alg_params.slow.main_nostat_cost > 16)
+		{
+			LZX_DEBUG("Invalid main_nostat_cost!");
+			return false;
+		}
+
+		if (params->alg_params.slow.len_nostat_cost < 1 ||
+		    params->alg_params.slow.len_nostat_cost > 16)
+		{
+			LZX_DEBUG("Invalid len_nostat_cost!");
+			return false;
+		}
+
+		if (params->alg_params.slow.aligned_nostat_cost < 1 ||
+		    params->alg_params.slow.aligned_nostat_cost > 8)
+		{
+			LZX_DEBUG("Invalid aligned_nostat_cost!");
+			return false;
+		}
+	}
+	return true;
+}
+
 const struct compressor_ops lzx_compressor_ops = {
+	.params_valid	    = lzx_params_valid,
+	.get_needed_memory  = lzx_get_needed_memory,
 	.create_compressor  = lzx_create_compressor,
 	.compress	    = lzx_compress,
 	.free_compressor    = lzx_free_compressor,