#include <string.h>
-#define LZ_MIN_MATCH 3
-
#define HASH_BITS 15
#define HASH_SIZE (1 << HASH_BITS)
#define HASH_MASK (HASH_SIZE - 1)
-
-#if LZ_MIN_MATCH == 2
-# define HASH_SHIFT 8
-#elif LZ_MIN_MATCH == 3
-# define HASH_SHIFT 5
-#else
-#error "Invalid LZ_MIN_MATCH"
-#endif
+#define HASH_SHIFT 5
/* Hash function, based on code from zlib. This function will update and return
* the hash value @hash for the string ending on the additional input character
const u8 window[], unsigned str_pos,
unsigned hash)
{
- hash = update_hash(hash, window[str_pos + LZ_MIN_MATCH - 1]);
+ hash = update_hash(hash, window[str_pos + 2]);
prev_tab[str_pos] = hash_tab[hash];
hash_tab[hash] = str_pos;
return hash;
* @params: Parameters that affect how long the search will proceed
* before going with the best that has been found
* so far.
+ * @min_start_pos: If the chain reaches a match starting before this
+ * position (including the end-of-chain 0), the search will
+ * be terminated.
*
* Returns the length of the match that was found.
*/
unsigned strstart, const input_idx_t prev_tab[],
unsigned cur_match, unsigned prev_len,
unsigned *match_start_ret,
- const struct lz_params *params)
+ const struct lz_params *params,
+ unsigned min_start_pos)
{
unsigned chain_len = params->max_chain_len;
* performance reasons. Therefore uninitialized memory will be
* accessed, and conditional jumps will be made that depend on
* those values. However the length of the match is limited to
- * the lookahead, so the output of deflate is not affected by
- * the uninitialized values.
- */
+ * the lookahead, so the output of lz_analyze_block() is not
+ * affected by the uninitialized values. */
if (match[best_len] != scan_end
|| match[best_len - 1] != scan_end1
scan_end1 = scan[best_len - 1];
scan_end = scan[best_len];
}
- } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) != 0);
+ } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) >= min_start_pos);
*match_start_ret = match_start;
return min(min(best_len, bytes_remaining), params->max_match);
}
* @params: Structure that contains parameters that affect how the
* analysis proceeds (mainly how good the matches
* have to be).
+ * @prev_tab: Temporary space containing least @window_size elements.
*/
void
lz_analyze_block(const u8 window[],
lz_record_match_t record_match,
lz_record_literal_t record_literal,
void *record_ctx,
- const struct lz_params *params)
+ const struct lz_params *params,
+ input_idx_t prev_tab[])
{
unsigned cur_input_pos = 0;
unsigned hash = 0;
unsigned match_start = 0;
bool match_available = false;
input_idx_t hash_tab[HASH_SIZE];
- input_idx_t prev_tab[window_size];
+ unsigned min_start_pos = 1;
ZERO_ARRAY(hash_tab);
prev_start = match_start;
match_len = params->min_match - 1;
- if (hash_head != 0 && prev_len < params->max_lazy_match) {
+ if (cur_input_pos > params->max_offset)
+ min_start_pos = cur_input_pos - params->max_offset;
+ else
+ min_start_pos = 1;
+
+ if (hash_head >= min_start_pos &&
+ prev_len < params->max_lazy_match)
+ {
/* To simplify the code, we prevent matches with the
* string of window index 0 (in particular we have to
* avoid a match of the string with itself at the start
window_size - cur_input_pos,
cur_input_pos, prev_tab,
hash_head, prev_len,
- &match_start, params);
+ &match_start, params,
+ min_start_pos);
if (match_len == params->min_match &&
cur_input_pos - match_start > params->too_far)
/* Do not insert strings in hash table beyond this. */
unsigned max_insert = window_size - params->min_match;
-#if LZ_MIN_MATCH == 2
- if (prev_len >= 3)
-#endif
- {
- prev_len -= 2;
-
- do {
- if (++cur_input_pos <= max_insert) {
- hash = insert_string(hash_tab, prev_tab,
- window,
- cur_input_pos,
- hash);
- }
- } while (--prev_len != 0);
- }
+
+ prev_len -= 2;
+
+ do {
+ if (++cur_input_pos <= max_insert) {
+ hash = insert_string(hash_tab, prev_tab,
+ window,
+ cur_input_pos,
+ hash);
+ }
+ } while (--prev_len != 0);
match_available = false;
match_len = params->min_match - 1;
} else if (match_available) {