X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flz.c;h=c248acf597b95777821d21cebcf81210ebdba01e;hb=9368708ff94a8da723874c098d24fefcd5866207;hp=94cfc69655f21dcd86bdfb983bcda22efdf7c1dc;hpb=6f77434ea6ff1407603410e28d1edb966c40e568;p=wimlib diff --git a/src/lz.c b/src/lz.c index 94cfc696..c248acf5 100644 --- a/src/lz.c +++ b/src/lz.c @@ -13,26 +13,35 @@ * This file is part of wimlib, a library for working with WIM files. * * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) * any later version. * * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * - * You should have received a copy of the GNU Lesser General Public License + * You should have received a copy of the GNU General Public License * along with wimlib; if not, see http://www.gnu.org/licenses/. */ #include "comp.h" #include +#define LZ_MIN_MATCH 3 + #define HASH_BITS 15 #define HASH_SIZE (1 << HASH_BITS) #define HASH_MASK (HASH_SIZE - 1) -#define HASH_SHIFT ((HASH_BITS + 2) / 3) + +#if LZ_MIN_MATCH == 2 +# define HASH_SHIFT 8 +#elif LZ_MIN_MATCH == 3 +# define HASH_SHIFT 5 +#else +#error "Invalid LZ_MIN_MATCH" +#endif /* Hash function, based on code from zlib. This function will update and return * the hash value @hash for the string ending on the additional input character @@ -61,11 +70,10 @@ static inline uint update_hash(uint hash, u8 c) * to walk through the hash chain, until the special index `0' is reached, * indicating the end of the hash chain. */ -static inline uint insert_string(u16 hash_tab[], u16 prev_tab[], - const u8 window[], uint str_pos, - uint hash) +static inline uint insert_string(u16 hash_tab[], u16 prev_tab[], + const u8 window[], uint str_pos, uint hash) { - hash = update_hash(hash, window[str_pos + 2]); + hash = update_hash(hash, window[str_pos + LZ_MIN_MATCH - 1]); prev_tab[str_pos] = hash_tab[hash]; hash_tab[hash] = str_pos; return hash; @@ -80,10 +88,10 @@ static inline uint insert_string(u16 hash_tab[], u16 prev_tab[], * @strstart: The index of the start of the string in the window that * we are trying to find a match for. * @prev_tab: The array of prev pointers for the hash table. - * @cur_match: The index of the head of the hash chain for matches - * having the hash value of the string beginning + * @cur_match: The index of the head of the hash chain for matches + * having the hash value of the string beginning * at index @strstart. - * @prev_len: The length of the match that was found for the string + * @prev_len: The length of the match that was found for the string * beginning at (@strstart - 1). * @match_start_ret: A location into which the index of the start of the * match will be returned. @@ -94,9 +102,9 @@ static inline uint insert_string(u16 hash_tab[], u16 prev_tab[], * Returns the length of the match that was found. */ static uint longest_match(const u8 window[], uint bytes_remaining, - uint strstart, const u16 prev_tab[], - uint cur_match, uint prev_len, - uint *match_start_ret, + uint strstart, const u16 prev_tab[], + uint cur_match, uint prev_len, + uint *match_start_ret, const struct lz_params *params) { uint chain_len = params->max_chain_len; @@ -109,7 +117,7 @@ static uint longest_match(const u8 window[], uint bytes_remaining, uint nice_match = min(params->nice_match, bytes_remaining); - const u8 *strend = scan + params->max_match; + const u8 *strend = scan + min(params->max_match, bytes_remaining); u8 scan_end1 = scan[best_len - 1]; u8 scan_end = scan[best_len]; @@ -122,60 +130,56 @@ static uint longest_match(const u8 window[], uint bytes_remaining, do { match = &window[cur_match]; - - /* Skip to next match if the match length cannot increase - * or if the match length is less than 2. Note that the checks below - * for insufficient lookahead only occur occasionally for performance - * reasons. Therefore uninitialized memory will be accessed, and - * conditional jumps will be made that depend on those values. - * However the length of the match is limited to the lookahead, so - * the output of deflate is not affected by the uninitialized values. + /* Skip to next match if the match length cannot increase or if + * the match length is less than 2. Note that the checks below + * for insufficient lookahead only occur occasionally for + * performance reasons. Therefore uninitialized memory will be + * accessed, and conditional jumps will be made that depend on + * those values. However the length of the match is limited to + * the lookahead, so the output of deflate is not affected by + * the uninitialized values. */ - if (match[best_len] != scan_end || - match[best_len-1] != scan_end1 || - *match != *scan || - *++match != scan[1]) continue; + if (match[best_len] != scan_end + || match[best_len - 1] != scan_end1 + || *match != *scan + || *++match != scan[1]) + continue; + scan++; - /* The check at best_len-1 can be removed because it will be made - * again later. (This heuristic is not always a win.) - * It is not necessary to compare scan[2] and match[2] since they - * are always equal when the other bytes match, given that - * the hash keys are equal and that HASH_BITS >= 8. - */ - scan += 2, match++; - - wimlib_assert(*scan == *match); + #if 0 + do { + } while (scan < strend && *++match == *++scan); + #else - /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. */ do { - } while (*++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - scan < strend); + } while ( + *++match == *++scan && *++match == *++scan && + *++match == *++scan && *++match == *++scan && + *++match == *++scan && *++match == *++scan && + *++match == *++scan && *++match == *++scan && + scan < strend); + #endif + len = match - &window[cur_match]; - len = params->max_match - (int)(strend - scan); - scan = strend - params->max_match; + scan = &window[strstart]; if (len > best_len) { match_start = cur_match; best_len = len; - if (len >= nice_match) + if (len >= nice_match) break; scan_end1 = scan[best_len - 1]; scan_end = scan[best_len]; } - cur_match = prev_tab[cur_match]; - } while (--chain_len != 0 && cur_match != 0); + } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) != 0); *match_start_ret = match_start; return min(min(best_len, bytes_remaining), params->max_match); } -/* +/* * Determines the sequence of matches and literals that a block of data will be * compressed to. * @@ -191,8 +195,8 @@ static uint longest_match(const u8 window[], uint bytes_remaining, * @record_literal: A function that will be called to produce the * intermediate representation of a literal, given * the character of the literal. This function - * should also update the appropriate symbol - * frequency counts so that any needed Huffman + * should also update the appropriate symbol + * frequency counts so that any needed Huffman * codes can be made later. * @record_match_arg_1: * @record_match_arg_2: Extra arguments to be passed to @record_match. @@ -206,10 +210,10 @@ static uint longest_match(const u8 window[], uint bytes_remaining, * intermediate representation of a match or literal byte. */ uint lz_analyze_block(const u8 uncompressed_data[], uint uncompressed_len, - u32 match_tab[], lz_record_match_t record_match, - lz_record_literal_t record_literal, void *record_match_arg1, - void *record_match_arg2, void *record_literal_arg, - const struct lz_params *params) + u32 match_tab[], lz_record_match_t record_match, + lz_record_literal_t record_literal, void *record_match_arg1, + void *record_match_arg2, void *record_literal_arg, + const struct lz_params *params) { uint cur_match_pos = 0; uint cur_input_pos = 0; @@ -235,9 +239,9 @@ uint lz_analyze_block(const u8 uncompressed_data[], uint uncompressed_len, * hash_head is set to the index of the previous string in the * hash bucket, or 0 if there is no such string */ if (uncompressed_len - cur_input_pos >= params->min_match) { - hash = insert_string(hash_tab, prev_tab, - uncompressed_data, - cur_input_pos, hash); + hash = insert_string(hash_tab, prev_tab, + uncompressed_data, + cur_input_pos, hash); hash_head = prev_tab[cur_input_pos]; } else { hash_head = 0; @@ -249,22 +253,20 @@ uint lz_analyze_block(const u8 uncompressed_data[], uint uncompressed_len, prev_start = match_start; match_len = params->min_match - 1; - if (hash_head != 0 && prev_len < params->min_match) { + if (hash_head != 0 && prev_len < params->max_lazy_match) { /* To simplify the code, we prevent matches with the * string of window index 0 (in particular we have to * avoid a match of the string with itself at the start * of the input file). */ - match_len = longest_match(uncompressed_data, - uncompressed_len - cur_input_pos, - cur_input_pos, prev_tab, - hash_head, prev_len, - &match_start, params); - - if (match_len == params->min_match && - cur_input_pos - match_start > params->too_far) - { + match_len = longest_match(uncompressed_data, + uncompressed_len - cur_input_pos, + cur_input_pos, prev_tab, + hash_head, prev_len, + &match_start, params); + + if (match_len == params->min_match && + cur_input_pos - match_start > params->too_far) match_len = params->min_match - 1; - } } /* If there was a match at the previous step and the current @@ -280,10 +282,10 @@ uint lz_analyze_block(const u8 uncompressed_data[], uint uncompressed_len, /*cur_input_pos - 1 - prev_start,*/ /*prev_len);*/ - match = (*record_match)(cur_input_pos - 1 - prev_start, - prev_len, - record_match_arg1, - record_match_arg2); + match = (*record_match)(cur_input_pos - 1 - prev_start, + prev_len, + record_match_arg1, + record_match_arg2); match_tab[cur_match_pos++] = match; @@ -292,17 +294,21 @@ uint lz_analyze_block(const u8 uncompressed_data[], uint uncompressed_len, * enough lookahead, the last two strings are not inserted in * the hash table. */ - prev_len -= 2; - - do { - if (++cur_input_pos <= max_insert) { - hash = insert_string(hash_tab, prev_tab, - uncompressed_data, - cur_input_pos, - hash); - } - } while (--prev_len != 0); - +#if LZ_MIN_MATCH == 2 + if (prev_len >= 3) +#endif + { + prev_len -= 2; + + do { + if (++cur_input_pos <= max_insert) { + hash = insert_string(hash_tab, prev_tab, + uncompressed_data, + cur_input_pos, + hash); + } + } while (--prev_len != 0); + } match_available = false; match_len = params->min_match - 1; } else if (match_available) {