X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flz77.c;h=887b874506857251b2bda6c92809c3d008777222;hp=b9a40174393460e36491ecd97428feeb6795abb9;hb=49a63aa13cdeb4c1348697ccd92207a1a65ec7b0;hpb=4757f17833c96b8c83a7e17cbc6f374c449d60db

diff --git a/src/lz77.c b/src/lz77.c
index b9a40174..887b8745 100644
--- a/src/lz77.c
+++ b/src/lz77.c
@@ -107,6 +107,9 @@ insert_string(input_idx_t hash_tab[], input_idx_t prev_tab[],
  * @params:		Parameters that affect how long the search will proceed
  * 				before going with the best that has been found
  * 				so far.
+ * @min_start_pos:	If the chain reaches a match starting before this
+ *			position (including the end-of-chain 0), the search will
+ *			be terminated.
  *
  * Returns the length of the match that was found.
  */
@@ -115,7 +118,8 @@ longest_match(const u8 window[], unsigned bytes_remaining,
 	      unsigned strstart, const input_idx_t prev_tab[],
 	      unsigned cur_match, unsigned prev_len,
 	      unsigned *match_start_ret,
-	      const struct lz_params *params)
+	      const struct lz_params *params,
+	      unsigned min_start_pos)
 {
 	unsigned chain_len = params->max_chain_len;
 
@@ -146,9 +150,8 @@ longest_match(const u8 window[], unsigned bytes_remaining,
 		 * performance reasons.  Therefore uninitialized memory will be
 		 * accessed, and conditional jumps will be made that depend on
 		 * those values.  However the length of the match is limited to
-		 * the lookahead, so the output of deflate is not affected by
-		 * the uninitialized values.
-		 */
+		 * the lookahead, so the output of lz_analyze_block() is not
+		 * affected by the uninitialized values.  */
 
 		if (match[best_len] != scan_end
 		    || match[best_len - 1] != scan_end1
@@ -182,7 +185,7 @@ longest_match(const u8 window[], unsigned bytes_remaining,
 			scan_end1  = scan[best_len - 1];
 			scan_end   = scan[best_len];
 		}
-	} while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) != 0);
+	} while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) >= min_start_pos);
 	*match_start_ret = match_start;
 	return min(min(best_len, bytes_remaining), params->max_match);
 }
@@ -201,6 +204,7 @@ longest_match(const u8 window[], unsigned bytes_remaining,
  * @params:		Structure that contains parameters that affect how the
  * 				analysis proceeds (mainly how good the matches
  * 				have to be).
+ * @prev_tab:		Temporary space containing least @window_size elements.
  */
 void
 lz_analyze_block(const u8 window[],
@@ -208,7 +212,8 @@ lz_analyze_block(const u8 window[],
 		 lz_record_match_t record_match,
 		 lz_record_literal_t record_literal,
 		 void *record_ctx,
-		 const struct lz_params *params)
+		 const struct lz_params *params,
+		 input_idx_t prev_tab[])
 {
 	unsigned cur_input_pos = 0;
 	unsigned hash          = 0;
@@ -219,7 +224,7 @@ lz_analyze_block(const u8 window[],
 	unsigned match_start   = 0;
 	bool match_available = false;
 	input_idx_t hash_tab[HASH_SIZE];
-	input_idx_t prev_tab[window_size];
+	unsigned min_start_pos = 1;
 
 	ZERO_ARRAY(hash_tab);
 
@@ -245,7 +250,14 @@ lz_analyze_block(const u8 window[],
 		prev_start = match_start;
 		match_len = params->min_match - 1;
 
-		if (hash_head != 0 && prev_len < params->max_lazy_match) {
+		if (cur_input_pos > params->max_offset)
+			min_start_pos = cur_input_pos - params->max_offset;
+		else
+			min_start_pos = 1;
+
+		if (hash_head >= min_start_pos &&
+		    prev_len < params->max_lazy_match)
+		{
 			/* To simplify the code, we prevent matches with the
 			 * string of window index 0 (in particular we have to
 			 * avoid a match of the string with itself at the start
@@ -254,7 +266,8 @@ lz_analyze_block(const u8 window[],
 						  window_size - cur_input_pos,
 						  cur_input_pos, prev_tab,
 						  hash_head, prev_len,
-						  &match_start, params);
+						  &match_start, params,
+						  min_start_pos);
 
 			if (match_len == params->min_match &&
 			     cur_input_pos - match_start > params->too_far)