X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flz77.c;h=b5495da74d1599cd59e12cc8ef65c495550eba55;hb=cbd31ea4e13a4e7479b2a19dc59efd9f2dd86e5e;hp=b9a40174393460e36491ecd97428feeb6795abb9;hpb=4757f17833c96b8c83a7e17cbc6f374c449d60db;p=wimlib diff --git a/src/lz77.c b/src/lz77.c index b9a40174..b5495da7 100644 --- a/src/lz77.c +++ b/src/lz77.c @@ -30,24 +30,15 @@ # include #endif -#include "wimlib/compress.h" +#include "wimlib/compress_common.h" #include "wimlib/util.h" #include -#define LZ_MIN_MATCH 3 - #define HASH_BITS 15 #define HASH_SIZE (1 << HASH_BITS) #define HASH_MASK (HASH_SIZE - 1) - -#if LZ_MIN_MATCH == 2 -# define HASH_SHIFT 8 -#elif LZ_MIN_MATCH == 3 -# define HASH_SHIFT 5 -#else -#error "Invalid LZ_MIN_MATCH" -#endif +#define HASH_SHIFT 5 /* Hash function, based on code from zlib. This function will update and return * the hash value @hash for the string ending on the additional input character @@ -82,7 +73,7 @@ insert_string(input_idx_t hash_tab[], input_idx_t prev_tab[], const u8 window[], unsigned str_pos, unsigned hash) { - hash = update_hash(hash, window[str_pos + LZ_MIN_MATCH - 1]); + hash = update_hash(hash, window[str_pos + 2]); prev_tab[str_pos] = hash_tab[hash]; hash_tab[hash] = str_pos; return hash; @@ -95,18 +86,21 @@ insert_string(input_idx_t hash_tab[], input_idx_t prev_tab[], * @window: The window of uncompressed data. * @bytes_remaining: The number of bytes remaining in the window. * @strstart: The index of the start of the string in the window that - * we are trying to find a match for. + * we are trying to find a match for. * @prev_tab: The array of prev pointers for the hash table. * @cur_match: The index of the head of the hash chain for matches - * having the hash value of the string beginning - * at index @strstart. + * having the hash value of the string beginning + * at index @strstart. * @prev_len: The length of the match that was found for the string - * beginning at (@strstart - 1). + * beginning at (@strstart - 1). * @match_start_ret: A location into which the index of the start of the - * match will be returned. + * match will be returned. * @params: Parameters that affect how long the search will proceed - * before going with the best that has been found - * so far. + * before going with the best that has been found + * so far. + * @min_start_pos: If the chain reaches a match starting before this + * position (including the end-of-chain 0), the search will + * be terminated. * * Returns the length of the match that was found. */ @@ -115,7 +109,8 @@ longest_match(const u8 window[], unsigned bytes_remaining, unsigned strstart, const input_idx_t prev_tab[], unsigned cur_match, unsigned prev_len, unsigned *match_start_ret, - const struct lz_params *params) + const struct lz_params *params, + unsigned min_start_pos) { unsigned chain_len = params->max_chain_len; @@ -146,9 +141,8 @@ longest_match(const u8 window[], unsigned bytes_remaining, * performance reasons. Therefore uninitialized memory will be * accessed, and conditional jumps will be made that depend on * those values. However the length of the match is limited to - * the lookahead, so the output of deflate is not affected by - * the uninitialized values. - */ + * the lookahead, so the output of lz_analyze_block() is not + * affected by the uninitialized values. */ if (match[best_len] != scan_end || match[best_len - 1] != scan_end1 @@ -182,7 +176,7 @@ longest_match(const u8 window[], unsigned bytes_remaining, scan_end1 = scan[best_len - 1]; scan_end = scan[best_len]; } - } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) != 0); + } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) >= min_start_pos); *match_start_ret = match_start; return min(min(best_len, bytes_remaining), params->max_match); } @@ -199,16 +193,18 @@ longest_match(const u8 window[], unsigned bytes_remaining, * @record_literal: Consumer for literals. * @record_ctx: Context passed to @record_match and @record_literal. * @params: Structure that contains parameters that affect how the - * analysis proceeds (mainly how good the matches - * have to be). + * analysis proceeds (mainly how good the matches + * have to be). + * @prev_tab: Temporary space containing least @window_size elements. */ void -lz_analyze_block(const u8 window[], +lz_analyze_block(const u8 window[restrict], input_idx_t window_size, lz_record_match_t record_match, lz_record_literal_t record_literal, void *record_ctx, - const struct lz_params *params) + const struct lz_params *params, + input_idx_t prev_tab[restrict]) { unsigned cur_input_pos = 0; unsigned hash = 0; @@ -219,7 +215,7 @@ lz_analyze_block(const u8 window[], unsigned match_start = 0; bool match_available = false; input_idx_t hash_tab[HASH_SIZE]; - input_idx_t prev_tab[window_size]; + unsigned min_start_pos = 1; ZERO_ARRAY(hash_tab); @@ -245,7 +241,14 @@ lz_analyze_block(const u8 window[], prev_start = match_start; match_len = params->min_match - 1; - if (hash_head != 0 && prev_len < params->max_lazy_match) { + if (cur_input_pos > params->max_offset) + min_start_pos = cur_input_pos - params->max_offset; + else + min_start_pos = 1; + + if (hash_head >= min_start_pos && + prev_len < params->max_lazy_match) + { /* To simplify the code, we prevent matches with the * string of window index 0 (in particular we have to * avoid a match of the string with itself at the start @@ -254,7 +257,8 @@ lz_analyze_block(const u8 window[], window_size - cur_input_pos, cur_input_pos, prev_tab, hash_head, prev_len, - &match_start, params); + &match_start, params, + min_start_pos); if (match_len == params->min_match && cur_input_pos - match_start > params->too_far) @@ -278,21 +282,17 @@ lz_analyze_block(const u8 window[], /* Do not insert strings in hash table beyond this. */ unsigned max_insert = window_size - params->min_match; -#if LZ_MIN_MATCH == 2 - if (prev_len >= 3) -#endif - { - prev_len -= 2; - - do { - if (++cur_input_pos <= max_insert) { - hash = insert_string(hash_tab, prev_tab, - window, - cur_input_pos, - hash); - } - } while (--prev_len != 0); - } + + prev_len -= 2; + + do { + if (++cur_input_pos <= max_insert) { + hash = insert_string(hash_tab, prev_tab, + window, + cur_input_pos, + hash); + } + } while (--prev_len != 0); match_available = false; match_len = params->min_match - 1; } else if (match_available) {