X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzx-compress.c;h=cdbb63699004edbed7066f9344f6d1c6f30207d2;hp=aedd05a86e8a64d78430f781f79a045998fce8fb;hb=10be6c712f4f5f066458dbab11e81f8e64cd2324;hpb=14a1547f3404346127a82e643809bc837425ec8e

diff --git a/src/lzx-compress.c b/src/lzx-compress.c
index aedd05a8..cdbb6369 100644
--- a/src/lzx-compress.c
+++ b/src/lzx-compress.c
@@ -254,7 +254,7 @@
 #define LZX_CACHE_PER_POS	8
 
 #define LZX_CACHE_LEN (LZX_DIV_BLOCK_SIZE * (LZX_CACHE_PER_POS + 1))
-#define LZX_CACHE_SIZE (LZX_CACHE_LEN * sizeof(struct raw_match))
+#define LZX_CACHE_SIZE (LZX_CACHE_LEN * sizeof(struct lz_match))
 #define LZX_MAX_MATCHES_PER_POS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
 
 /* Codewords for the LZX main, length, and aligned offset Huffman codes  */
@@ -279,7 +279,7 @@ struct lzx_lens {
  *
  * If a codeword has zero frequency, it must still be assigned some nonzero cost
  * --- generally a high cost, since even if it gets used in the next iteration,
- * it probably will not be used very times.  */
+ * it probably will not be used very many times.  */
 struct lzx_costs {
 	u8 main[LZX_MAINCODE_MAX_NUM_SYMBOLS];
 	u8 len[LZX_LENCODE_NUM_SYMBOLS];
@@ -294,13 +294,13 @@ struct lzx_codes {
 
 /* Tables for tallying symbol frequencies in the three LZX alphabets  */
 struct lzx_freqs {
-	input_idx_t main[LZX_MAINCODE_MAX_NUM_SYMBOLS];
-	input_idx_t len[LZX_LENCODE_NUM_SYMBOLS];
-	input_idx_t aligned[LZX_ALIGNEDCODE_NUM_SYMBOLS];
+	u32 main[LZX_MAINCODE_MAX_NUM_SYMBOLS];
+	u32 len[LZX_LENCODE_NUM_SYMBOLS];
+	u32 aligned[LZX_ALIGNEDCODE_NUM_SYMBOLS];
 };
 
 /* LZX intermediate match/literal format  */
-struct lzx_match {
+struct lzx_item {
 	/* Bit     Description
 	 *
 	 * 31      1 if a match, 0 if a literal.
@@ -325,16 +325,16 @@ struct lzx_block_spec {
 	int block_type;
 
 	/* 0-based position in the window at which this block starts.  */
-	input_idx_t window_pos;
+	u32 window_pos;
 
 	/* The number of bytes of uncompressed data this block represents.  */
-	input_idx_t block_size;
+	u32 block_size;
 
 	/* The match/literal sequence for this block.  */
-	struct lzx_match *chosen_matches;
+	struct lzx_item *chosen_items;
 
-	/* The length of the @chosen_matches sequence.  */
-	input_idx_t num_chosen_matches;
+	/* The length of the @chosen_items sequence.  */
+	u32 num_chosen_items;
 
 	/* Huffman codes for this block.  */
 	struct lzx_codes codes;
@@ -363,10 +363,10 @@ struct lzx_compressor {
 
 	/* Number of bytes of data to be compressed, which is the number of
 	 * bytes of data in @window that are actually valid.  */
-	input_idx_t window_size;
+	u32 window_size;
 
 	/* Allocated size of the @window.  */
-	input_idx_t max_window_size;
+	u32 max_window_size;
 
 	/* Number of symbols in the main alphabet (depends on the
 	 * @max_window_size since it determines the maximum allowed offset).  */
@@ -377,7 +377,7 @@ struct lzx_compressor {
 
 	/* Space for the sequences of matches/literals that were chosen for each
 	 * block.  */
-	struct lzx_match *chosen_matches;
+	struct lzx_item *chosen_items;
 
 	/* Information about the LZX blocks the preprocessed input was divided
 	 * into.  */
@@ -397,27 +397,27 @@ struct lzx_compressor {
 	struct lzx_costs costs;
 
 	/* Fast algorithm only:  Array of hash table links.  */
-	input_idx_t *prev_tab;
+	u32 *prev_tab;
 
 	/* Slow algorithm only: Binary tree match-finder.  */
 	struct lz_bt mf;
 
 	/* Position in window of next match to return.  */
-	input_idx_t match_window_pos;
+	u32 match_window_pos;
 
 	/* The end-of-block position.  We can't allow any matches to span this
 	 * position.  */
-	input_idx_t match_window_end;
+	u32 match_window_end;
 
 	/* Matches found by the match-finder are cached in the following array
 	 * to achieve a slight speedup when the same matches are needed on
 	 * subsequent passes.  This is suboptimal because different matches may
 	 * be preferred with different cost models, but seems to be a worthwhile
 	 * speedup.  */
-	struct raw_match *cached_matches;
-	struct raw_match *cache_ptr;
+	struct lz_match *cached_matches;
+	struct lz_match *cache_ptr;
 	bool matches_cached;
-	struct raw_match *cache_limit;
+	struct lz_match *cache_limit;
 
 	/* Match-chooser state.
 	 * When matches have been chosen, optimum_cur_idx is set to the position
@@ -453,22 +453,22 @@ struct lzx_mc_pos_data {
 			/* Position of the start of the match or literal that
 			 * was taken to get to this position in the approximate
 			 * minimum-cost parse.  */
-			input_idx_t link;
+			u32 link;
 
 			/* Offset (as in an LZ (length, offset) pair) of the
 			 * match or literal that was taken to get to this
 			 * position in the approximate minimum-cost parse.  */
-			input_idx_t match_offset;
+			u32 match_offset;
 		} prev;
 		struct {
 			/* Position at which the match or literal starting at
 			 * this position ends in the minimum-cost parse.  */
-			input_idx_t link;
+			u32 link;
 
 			/* Offset (as in an LZ (length, offset) pair) of the
 			 * match or literal starting at this position in the
 			 * approximate minimum-cost parse.  */
-			input_idx_t match_offset;
+			u32 match_offset;
 		} next;
 	};
 
@@ -557,7 +557,7 @@ lzx_make_huffman_codes(const struct lzx_freqs *freqs,
  */
 static void
 lzx_write_match(struct output_bitstream *out, int block_type,
-		struct lzx_match match, const struct lzx_codes *codes)
+		struct lzx_item match, const struct lzx_codes *codes)
 {
 	/* low 8 bits are the match length minus 2 */
 	unsigned match_len_minus_2 = match.data & 0xff;
@@ -643,7 +643,7 @@ static unsigned
 lzx_build_precode(const u8 lens[restrict],
 		  const u8 prev_lens[restrict],
 		  const unsigned num_syms,
-		  input_idx_t precode_freqs[restrict LZX_PRECODE_NUM_SYMBOLS],
+		  u32 precode_freqs[restrict LZX_PRECODE_NUM_SYMBOLS],
 		  u8 output_syms[restrict num_syms],
 		  u8 precode_lens[restrict LZX_PRECODE_NUM_SYMBOLS],
 		  u32 precode_codewords[restrict LZX_PRECODE_NUM_SYMBOLS],
@@ -812,7 +812,7 @@ lzx_write_compressed_code(struct output_bitstream *out,
 			  const u8 prev_lens[restrict],
 			  unsigned num_syms)
 {
-	input_idx_t precode_freqs[LZX_PRECODE_NUM_SYMBOLS];
+	u32 precode_freqs[LZX_PRECODE_NUM_SYMBOLS];
 	u8 output_syms[num_syms];
 	u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
 	u32 precode_codewords[LZX_PRECODE_NUM_SYMBOLS];
@@ -883,12 +883,12 @@ lzx_write_compressed_code(struct output_bitstream *out,
 static void
 lzx_write_matches_and_literals(struct output_bitstream *ostream,
 			       int block_type,
-			       const struct lzx_match match_tab[],
+			       const struct lzx_item match_tab[],
 			       unsigned match_count,
 			       const struct lzx_codes *codes)
 {
 	for (unsigned i = 0; i < match_count; i++) {
-		struct lzx_match match = match_tab[i];
+		struct lzx_item match = match_tab[i];
 
 		/* The high bit of the 32-bit intermediate representation
 		 * indicates whether the item is an actual LZ-style match (1) or
@@ -943,8 +943,8 @@ lzx_write_compressed_block(int block_type,
 			   unsigned block_size,
 			   unsigned max_window_size,
 			   unsigned num_main_syms,
-			   struct lzx_match * chosen_matches,
-			   unsigned num_chosen_matches,
+			   struct lzx_item * chosen_items,
+			   unsigned num_chosen_items,
 			   const struct lzx_codes * codes,
 			   const struct lzx_codes * prev_codes,
 			   struct output_bitstream * ostream)
@@ -1022,7 +1022,7 @@ lzx_write_compressed_block(int block_type,
 
 	/* Write the actual matches and literals.  */
 	lzx_write_matches_and_literals(ostream, block_type,
-				       chosen_matches, num_chosen_matches,
+				       chosen_items, num_chosen_items,
 				       codes);
 
 	LZX_DEBUG("Done writing block.");
@@ -1037,17 +1037,17 @@ lzx_write_all_blocks(struct lzx_compressor *ctx, struct output_bitstream *ostrea
 	for (unsigned i = 0; i < ctx->num_blocks; i++) {
 		const struct lzx_block_spec *spec = &ctx->block_specs[i];
 
-		LZX_DEBUG("Writing block %u/%u (type=%d, size=%u, num_chosen_matches=%u)...",
+		LZX_DEBUG("Writing block %u/%u (type=%d, size=%u, num_chosen_items=%u)...",
 			  i + 1, ctx->num_blocks,
 			  spec->block_type, spec->block_size,
-			  spec->num_chosen_matches);
+			  spec->num_chosen_items);
 
 		lzx_write_compressed_block(spec->block_type,
 					   spec->block_size,
 					   ctx->max_window_size,
 					   ctx->num_main_syms,
-					   spec->chosen_matches,
-					   spec->num_chosen_matches,
+					   spec->chosen_items,
+					   spec->num_chosen_items,
 					   &spec->codes,
 					   prev_codes,
 					   ostream);
@@ -1058,7 +1058,7 @@ lzx_write_all_blocks(struct lzx_compressor *ctx, struct output_bitstream *ostrea
 
 /* Constructs an LZX match from a literal byte and updates the main code symbol
  * frequencies.  */
-static u32
+static inline u32
 lzx_tally_literal(u8 lit, struct lzx_freqs *freqs)
 {
 	freqs->main[lit]++;
@@ -1069,7 +1069,7 @@ lzx_tally_literal(u8 lit, struct lzx_freqs *freqs)
  * queue and the frequency of symbols in the main, length, and aligned offset
  * alphabets.  The return value is a 32-bit number that provides the match in an
  * intermediate representation documented below.  */
-static u32
+static inline u32
 lzx_tally_match(unsigned match_len, u32 match_offset,
 		struct lzx_freqs *freqs, struct lzx_lru_queue *queue)
 {
@@ -1121,7 +1121,7 @@ lzx_tally_match(unsigned match_len, u32 match_offset,
 		freqs->aligned[position_footer & 7]++;
 
 	/* Pack the position slot, position footer, and match length into an
-	 * intermediate representation.  See `struct lzx_match' for details.
+	 * intermediate representation.  See `struct lzx_item' for details.
 	 */
 	LZX_ASSERT(LZX_MAX_POSITION_SLOTS <= 64);
 	LZX_ASSERT(lzx_get_num_extra_bits(LZX_MAX_POSITION_SLOTS - 1) <= 17);
@@ -1139,7 +1139,7 @@ lzx_tally_match(unsigned match_len, u32 match_offset,
 struct lzx_record_ctx {
 	struct lzx_freqs freqs;
 	struct lzx_lru_queue queue;
-	struct lzx_match *matches;
+	struct lzx_item *matches;
 };
 
 static void
@@ -1248,10 +1248,10 @@ lzx_set_costs(struct lzx_compressor * ctx, const struct lzx_lens * lens)
  * value is the number of matches found.  */
 static unsigned
 lzx_get_matches(struct lzx_compressor *ctx,
-		const struct raw_match **matches_ret)
+		const struct lz_match **matches_ret)
 {
-	struct raw_match *cache_ptr;
-	struct raw_match *matches;
+	struct lz_match *cache_ptr;
+	struct lz_match *matches;
 	unsigned num_matches;
 
 	LZX_ASSERT(ctx->match_window_pos < ctx->match_window_end);
@@ -1324,7 +1324,7 @@ lzx_get_matches(struct lzx_compressor *ctx,
 static void
 lzx_skip_bytes(struct lzx_compressor *ctx, unsigned n)
 {
-	struct raw_match *cache_ptr;
+	struct lz_match *cache_ptr;
 
 	LZX_ASSERT(n <= ctx->match_window_end - ctx->match_window_pos);
 
@@ -1349,7 +1349,7 @@ lzx_skip_bytes(struct lzx_compressor *ctx, unsigned n)
  *
  * Returns the first match in the list.
  */
-static struct raw_match
+static struct lz_match
 lzx_match_chooser_reverse_list(struct lzx_compressor *ctx, unsigned cur_pos)
 {
 	unsigned prev_link, saved_prev_link;
@@ -1375,7 +1375,7 @@ lzx_match_chooser_reverse_list(struct lzx_compressor *ctx, unsigned cur_pos)
 
 	ctx->optimum_cur_idx = ctx->optimum[0].next.link;
 
-	return (struct raw_match)
+	return (struct lz_match)
 		{ .len = ctx->optimum_cur_idx,
 		  .offset = ctx->optimum[0].next.match_offset,
 		};
@@ -1443,12 +1443,12 @@ lzx_match_chooser_reverse_list(struct lzx_compressor *ctx, unsigned cur_pos)
  * The return value is a (length, offset) pair specifying the match or literal
  * chosen.  For literals, the length is 0 or 1 and the offset is meaningless.
  */
-static struct raw_match
+static struct lz_match
 lzx_get_near_optimal_match(struct lzx_compressor *ctx)
 {
 	unsigned num_matches;
-	const struct raw_match *matches;
-	struct raw_match match;
+	const struct lz_match *matches;
+	struct lz_match match;
 	unsigned longest_len;
 	unsigned longest_rep_len;
 	u32 longest_rep_offset;
@@ -1493,7 +1493,7 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
 	/* If there's a long match with a recent offset, take it.  */
 	if (longest_rep_len >= ctx->params.alg_params.slow.nice_match_length) {
 		lzx_skip_bytes(ctx, longest_rep_len);
-		return (struct raw_match) {
+		return (struct lz_match) {
 			.len = longest_rep_len,
 			.offset = longest_rep_offset,
 		};
@@ -1857,61 +1857,82 @@ lzx_optimize_block(struct lzx_compressor *ctx, struct lzx_block_spec *spec,
 	const struct lzx_lru_queue orig_queue = ctx->queue;
 	unsigned num_passes_remaining = num_passes;
 	struct lzx_freqs freqs;
+	const u8 *window_ptr;
+	const u8 *window_end;
+	struct lzx_item *next_chosen_match;
+	struct lz_match lz_match;
+	struct lzx_item lzx_item;
 
 	LZX_ASSERT(num_passes >= 1);
 	LZX_ASSERT(lz_bt_get_position(&ctx->mf) == spec->window_pos);
 
 	ctx->match_window_end = spec->window_pos + spec->block_size;
-	spec->chosen_matches = &ctx->chosen_matches[spec->window_pos];
 	ctx->matches_cached = false;
 
 	/* The first optimal parsing pass is done using the cost model already
 	 * set in ctx->costs.  Each later pass is done using a cost model
-	 * computed from the previous pass.  */
-	do {
-		const u8 *window_ptr;
-		const u8 *window_end;
-		struct lzx_match *next_chosen_match;
+	 * computed from the previous pass.
+	 *
+	 * To improve performance we only generate the array containing the
+	 * matches and literals in intermediate form on the final pass.  */
 
-		--num_passes_remaining;
+	while (--num_passes_remaining) {
 		ctx->match_window_pos = spec->window_pos;
 		ctx->cache_ptr = ctx->cached_matches;
 		memset(&freqs, 0, sizeof(freqs));
 		window_ptr = &ctx->window[spec->window_pos];
 		window_end = window_ptr + spec->block_size;
-		next_chosen_match = spec->chosen_matches;
 
 		while (window_ptr != window_end) {
-			struct raw_match raw_match;
-			struct lzx_match lzx_match;
 
-			raw_match = lzx_get_near_optimal_match(ctx);
+			lz_match = lzx_get_near_optimal_match(ctx);
 
-			LZX_ASSERT(!(raw_match.len == LZX_MIN_MATCH_LEN &&
-				     raw_match.offset == ctx->max_window_size -
+			LZX_ASSERT(!(lz_match.len == LZX_MIN_MATCH_LEN &&
+				     lz_match.offset == ctx->max_window_size -
 							 LZX_MIN_MATCH_LEN));
-			if (raw_match.len >= LZX_MIN_MATCH_LEN) {
-				lzx_match.data = lzx_tally_match(raw_match.len,
-								 raw_match.offset,
-								 &freqs,
-								 &ctx->queue);
-				window_ptr += raw_match.len;
+			if (lz_match.len >= LZX_MIN_MATCH_LEN) {
+				lzx_tally_match(lz_match.len, lz_match.offset,
+						&freqs, &ctx->queue);
+				window_ptr += lz_match.len;
 			} else {
-				lzx_match.data = lzx_tally_literal(*window_ptr,
-								   &freqs);
+				lzx_tally_literal(*window_ptr, &freqs);
 				window_ptr += 1;
 			}
-			*next_chosen_match++ = lzx_match;
 		}
-		spec->num_chosen_matches = next_chosen_match - spec->chosen_matches;
 		lzx_make_huffman_codes(&freqs, &spec->codes, ctx->num_main_syms);
-		if (num_passes_remaining) {
-			lzx_set_costs(ctx, &spec->codes.lens);
-			ctx->queue = orig_queue;
-			ctx->matches_cached = true;
-		}
-	} while (num_passes_remaining);
+		lzx_set_costs(ctx, &spec->codes.lens);
+		ctx->queue = orig_queue;
+		ctx->matches_cached = true;
+	}
 
+	ctx->match_window_pos = spec->window_pos;
+	ctx->cache_ptr = ctx->cached_matches;
+	memset(&freqs, 0, sizeof(freqs));
+	window_ptr = &ctx->window[spec->window_pos];
+	window_end = window_ptr + spec->block_size;
+
+	spec->chosen_items = &ctx->chosen_items[spec->window_pos];
+	next_chosen_match = spec->chosen_items;
+
+	while (window_ptr != window_end) {
+		lz_match = lzx_get_near_optimal_match(ctx);
+
+		LZX_ASSERT(!(lz_match.len == LZX_MIN_MATCH_LEN &&
+			     lz_match.offset == ctx->max_window_size -
+						 LZX_MIN_MATCH_LEN));
+		if (lz_match.len >= LZX_MIN_MATCH_LEN) {
+			lzx_item.data = lzx_tally_match(lz_match.len,
+							 lz_match.offset,
+							 &freqs, &ctx->queue);
+			window_ptr += lz_match.len;
+		} else {
+			lzx_item.data = lzx_tally_literal(*window_ptr, &freqs);
+			window_ptr += 1;
+		}
+		*next_chosen_match++ = lzx_item;
+	}
+	spec->num_chosen_items = next_chosen_match - spec->chosen_items;
+	lzx_make_huffman_codes(&freqs, &spec->codes, ctx->num_main_syms);
 	spec->block_type = lzx_choose_verbatim_or_aligned(&freqs, &spec->codes);
 }
 
@@ -1962,7 +1983,7 @@ lzx_prepare_blocks(struct lzx_compressor * ctx)
  *
  *	ctx->block_specs[]
  *	ctx->num_blocks
- *	ctx->chosen_matches[]
+ *	ctx->chosen_items[]
  */
 static void
 lzx_prepare_block_fast(struct lzx_compressor * ctx)
@@ -1988,7 +2009,7 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx)
 	/* Initialize symbol frequencies and match offset LRU queue.  */
 	memset(&record_ctx.freqs, 0, sizeof(struct lzx_freqs));
 	lzx_lru_queue_init(&record_ctx.queue);
-	record_ctx.matches = ctx->chosen_matches;
+	record_ctx.matches = ctx->chosen_items;
 
 	/* Determine series of matches/literals to output.  */
 	lz_analyze_block(ctx->window,
@@ -2004,8 +2025,8 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx)
 	spec->block_type = LZX_BLOCKTYPE_ALIGNED;
 	spec->window_pos = 0;
 	spec->block_size = ctx->window_size;
-	spec->num_chosen_matches = (record_ctx.matches - ctx->chosen_matches);
-	spec->chosen_matches = ctx->chosen_matches;
+	spec->num_chosen_items = (record_ctx.matches - ctx->chosen_items);
+	spec->chosen_items = ctx->chosen_items;
 	lzx_make_huffman_codes(&record_ctx.freqs, &spec->codes,
 			       ctx->num_main_syms);
 	ctx->num_blocks = 1;
@@ -2065,7 +2086,7 @@ lzx_compress(const void *uncompressed_data, size_t uncompressed_size,
 
 	LZX_DEBUG("Flushing bitstream...");
 	compressed_size = flush_output_bitstream(&ostream);
-	if (compressed_size == ~(input_idx_t)0) {
+	if (compressed_size == (u32)~0UL) {
 		LZX_DEBUG("Data did not compress to %zu bytes or less!",
 			  compressed_size_avail);
 		return 0;
@@ -2125,7 +2146,7 @@ lzx_free_compressor(void *_ctx)
 	struct lzx_compressor *ctx = _ctx;
 
 	if (ctx) {
-		FREE(ctx->chosen_matches);
+		FREE(ctx->chosen_items);
 		FREE(ctx->cached_matches);
 		FREE(ctx->optimum);
 		lz_bt_destroy(&ctx->mf);
@@ -2199,8 +2220,6 @@ lzx_create_compressor(size_t window_size,
 	if (!lzx_window_size_valid(window_size))
 		return WIMLIB_ERR_INVALID_PARAM;
 
-	LZX_DEBUG("Allocating memory.");
-
 	ctx = CALLOC(1, sizeof(struct lzx_compressor));
 	if (ctx == NULL)
 		goto oom;
@@ -2241,7 +2260,7 @@ lzx_create_compressor(size_t window_size,
 				       min(params->alg_params.slow.nice_match_length,
 					   LZX_MAX_MATCH_LEN)) *
 						sizeof(ctx->optimum[0]));
-		if (!ctx->optimum)
+		if (ctx->optimum == NULL)
 			goto oom;
 	}
 
@@ -2253,8 +2272,8 @@ lzx_create_compressor(size_t window_size,
 				   LZX_CACHE_LEN - (LZX_MAX_MATCHES_PER_POS + 1);
 	}
 
-	ctx->chosen_matches = MALLOC(window_size * sizeof(ctx->chosen_matches[0]));
-	if (ctx->chosen_matches == NULL)
+	ctx->chosen_items = MALLOC(window_size * sizeof(ctx->chosen_items[0]));
+	if (ctx->chosen_items == NULL)
 		goto oom;
 
 	memcpy(&ctx->params, params, sizeof(struct wimlib_lzx_compressor_params));
@@ -2286,7 +2305,7 @@ lzx_get_needed_memory(size_t max_block_size,
 		sizeof(((struct lzx_compressor*)0)->block_specs[0]);
 
 	if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
-		size += max_block_size * sizeof(((struct lzx_compressor*)0)->chosen_matches[0]);
+		size += max_block_size * sizeof(((struct lzx_compressor*)0)->chosen_items[0]);
 		size += lz_bt_get_needed_memory(max_block_size);
 		size += (LZX_OPTIM_ARRAY_SIZE +
 			 min(params->alg_params.slow.nice_match_length,