# include <config.h>
#endif
-#include "wimlib/compress.h"
+#include "wimlib/compress_common.h"
#include "wimlib/util.h"
#include <string.h>
-#define LZ_MIN_MATCH 3
-
#define HASH_BITS 15
#define HASH_SIZE (1 << HASH_BITS)
#define HASH_MASK (HASH_SIZE - 1)
-
-#if LZ_MIN_MATCH == 2
-# define HASH_SHIFT 8
-#elif LZ_MIN_MATCH == 3
-# define HASH_SHIFT 5
-#else
-#error "Invalid LZ_MIN_MATCH"
-#endif
+#define HASH_SHIFT 5
/* Hash function, based on code from zlib. This function will update and return
* the hash value @hash for the string ending on the additional input character
const u8 window[], unsigned str_pos,
unsigned hash)
{
- hash = update_hash(hash, window[str_pos + LZ_MIN_MATCH - 1]);
+ hash = update_hash(hash, window[str_pos + 2]);
prev_tab[str_pos] = hash_tab[hash];
hash_tab[hash] = str_pos;
return hash;
* @window: The window of uncompressed data.
* @bytes_remaining: The number of bytes remaining in the window.
* @strstart: The index of the start of the string in the window that
- * we are trying to find a match for.
+ * we are trying to find a match for.
* @prev_tab: The array of prev pointers for the hash table.
* @cur_match: The index of the head of the hash chain for matches
- * having the hash value of the string beginning
- * at index @strstart.
+ * having the hash value of the string beginning
+ * at index @strstart.
* @prev_len: The length of the match that was found for the string
- * beginning at (@strstart - 1).
+ * beginning at (@strstart - 1).
* @match_start_ret: A location into which the index of the start of the
- * match will be returned.
+ * match will be returned.
* @params: Parameters that affect how long the search will proceed
- * before going with the best that has been found
- * so far.
+ * before going with the best that has been found
+ * so far.
+ * @min_start_pos: If the chain reaches a match starting before this
+ * position (including the end-of-chain 0), the search will
+ * be terminated.
*
* Returns the length of the match that was found.
*/
unsigned strstart, const input_idx_t prev_tab[],
unsigned cur_match, unsigned prev_len,
unsigned *match_start_ret,
- const struct lz_params *params)
+ const struct lz_params *params,
+ unsigned min_start_pos)
{
unsigned chain_len = params->max_chain_len;
* performance reasons. Therefore uninitialized memory will be
* accessed, and conditional jumps will be made that depend on
* those values. However the length of the match is limited to
- * the lookahead, so the output of deflate is not affected by
- * the uninitialized values.
- */
+ * the lookahead, so the output of lz_analyze_block() is not
+ * affected by the uninitialized values. */
if (match[best_len] != scan_end
|| match[best_len - 1] != scan_end1
scan_end1 = scan[best_len - 1];
scan_end = scan[best_len];
}
- } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) != 0);
+ } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) >= min_start_pos);
*match_start_ret = match_start;
return min(min(best_len, bytes_remaining), params->max_match);
}
* @record_literal: Consumer for literals.
* @record_ctx: Context passed to @record_match and @record_literal.
* @params: Structure that contains parameters that affect how the
- * analysis proceeds (mainly how good the matches
- * have to be).
+ * analysis proceeds (mainly how good the matches
+ * have to be).
+ * @prev_tab: Temporary space containing least @window_size elements.
*/
void
-lz_analyze_block(const u8 window[],
+lz_analyze_block(const u8 window[restrict],
input_idx_t window_size,
lz_record_match_t record_match,
lz_record_literal_t record_literal,
void *record_ctx,
- const struct lz_params *params)
+ const struct lz_params *params,
+ input_idx_t prev_tab[restrict])
{
unsigned cur_input_pos = 0;
unsigned hash = 0;
unsigned match_start = 0;
bool match_available = false;
input_idx_t hash_tab[HASH_SIZE];
- input_idx_t prev_tab[window_size];
+ unsigned min_start_pos = 1;
ZERO_ARRAY(hash_tab);
prev_start = match_start;
match_len = params->min_match - 1;
- if (hash_head != 0 && prev_len < params->max_lazy_match) {
+ if (cur_input_pos > params->max_offset)
+ min_start_pos = cur_input_pos - params->max_offset;
+ else
+ min_start_pos = 1;
+
+ if (hash_head >= min_start_pos &&
+ prev_len < params->max_lazy_match)
+ {
/* To simplify the code, we prevent matches with the
* string of window index 0 (in particular we have to
* avoid a match of the string with itself at the start
window_size - cur_input_pos,
cur_input_pos, prev_tab,
hash_head, prev_len,
- &match_start, params);
+ &match_start, params,
+ min_start_pos);
if (match_len == params->min_match &&
cur_input_pos - match_start > params->too_far)
/* Do not insert strings in hash table beyond this. */
unsigned max_insert = window_size - params->min_match;
-#if LZ_MIN_MATCH == 2
- if (prev_len >= 3)
-#endif
- {
- prev_len -= 2;
-
- do {
- if (++cur_input_pos <= max_insert) {
- hash = insert_string(hash_tab, prev_tab,
- window,
- cur_input_pos,
- hash);
- }
- } while (--prev_len != 0);
- }
+
+ prev_len -= 2;
+
+ do {
+ if (++cur_input_pos <= max_insert) {
+ hash = insert_string(hash_tab, prev_tab,
+ window,
+ cur_input_pos,
+ hash);
+ }
+ } while (--prev_len != 0);
match_available = false;
match_len = params->min_match - 1;
} else if (match_available) {