X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flz77.c;fp=src%2Flz77.c;h=b9a40174393460e36491ecd97428feeb6795abb9;hp=5ce8d89fc9192c6c20b4f4d788bb53a927473be9;hb=4757f17833c96b8c83a7e17cbc6f374c449d60db;hpb=aa0fe426c4b37b29aa70741084ba516da63dd479;ds=sidebyside diff --git a/src/lz77.c b/src/lz77.c index 5ce8d89f..b9a40174 100644 --- a/src/lz77.c +++ b/src/lz77.c @@ -27,7 +27,7 @@ */ #ifdef HAVE_CONFIG_H -# include "config.h" +# include #endif #include "wimlib/compress.h" @@ -78,7 +78,7 @@ update_hash(unsigned hash, u8 c) * indicating the end of the hash chain. */ static inline unsigned -insert_string(u16 hash_tab[], u16 prev_tab[], +insert_string(input_idx_t hash_tab[], input_idx_t prev_tab[], const u8 window[], unsigned str_pos, unsigned hash) { @@ -112,7 +112,7 @@ insert_string(u16 hash_tab[], u16 prev_tab[], */ static unsigned longest_match(const u8 window[], unsigned bytes_remaining, - unsigned strstart, const u16 prev_tab[], + unsigned strstart, const input_idx_t prev_tab[], unsigned cur_match, unsigned prev_len, unsigned *match_start_ret, const struct lz_params *params) @@ -193,44 +193,23 @@ longest_match(const u8 window[], unsigned bytes_remaining, * Determines the sequence of matches and literals that a block of data will be * compressed to. * - * @uncompressed_data: The data that is to be compressed. - * @uncompressed_len: The length of @uncompressed_data, in bytes. - * @match_tab: An array for the intermediate representation of matches. - * @record_match: A function that will be called to produce the - * intermediate representation of a match, given - * the offset and length. This function should also - * update the appropriate symbol frequency counts - * so that any needed Huffman codes can be made - * later. - * @record_literal: A function that will be called to produce the - * intermediate representation of a literal, given - * the character of the literal. This function - * should also update the appropriate symbol - * frequency counts so that any needed Huffman - * codes can be made later. - * @record_match_arg_1: - * @record_match_arg_2: Extra arguments to be passed to @record_match. - * @record_literal_arg: Extra arguments to be passed to @record_literal. + * @window: The data that is to be compressed. + * @window_size: The length of @window, in bytes. + * @record_match: Consumer for matches. + * @record_literal: Consumer for literals. + * @record_ctx: Context passed to @record_match and @record_literal. * @params: Structure that contains parameters that affect how the * analysis proceeds (mainly how good the matches * have to be). - * - * Returns the total number of matches and literal bytes that were found; this - * is the number of slots in @match_tab that have been filled with the - * intermediate representation of a match or literal byte. */ -unsigned -lz_analyze_block(const u8 uncompressed_data[], - unsigned uncompressed_len, - u32 match_tab[], +void +lz_analyze_block(const u8 window[], + input_idx_t window_size, lz_record_match_t record_match, lz_record_literal_t record_literal, - void *record_match_arg1, - void *record_match_arg2, - void *record_literal_arg, + void *record_ctx, const struct lz_params *params) { - unsigned cur_match_pos = 0; unsigned cur_input_pos = 0; unsigned hash = 0; unsigned hash_head = 0; @@ -239,23 +218,21 @@ lz_analyze_block(const u8 uncompressed_data[], unsigned match_len = params->min_match - 1; unsigned match_start = 0; bool match_available = false; - u16 hash_tab[HASH_SIZE]; - u32 match; - u16 prev_tab[uncompressed_len]; + input_idx_t hash_tab[HASH_SIZE]; + input_idx_t prev_tab[window_size]; ZERO_ARRAY(hash_tab); - ZERO_ARRAY(prev_tab); do { /* If there are at least 3 characters remaining in the input, * insert the 3-character string beginning at - * uncompressed_data[cur_input_pos] into the hash table. + * window[cur_input_pos] into the hash table. * * hash_head is set to the index of the previous string in the * hash bucket, or 0 if there is no such string */ - if (uncompressed_len - cur_input_pos >= params->min_match) { + if (window_size - cur_input_pos >= params->min_match) { hash = insert_string(hash_tab, prev_tab, - uncompressed_data, + window, cur_input_pos, hash); hash_head = prev_tab[cur_input_pos]; } else { @@ -273,8 +250,8 @@ lz_analyze_block(const u8 uncompressed_data[], * string of window index 0 (in particular we have to * avoid a match of the string with itself at the start * of the input file). */ - match_len = longest_match(uncompressed_data, - uncompressed_len - cur_input_pos, + match_len = longest_match(window, + window_size - cur_input_pos, cur_input_pos, prev_tab, hash_head, prev_len, &match_start, params); @@ -289,26 +266,18 @@ lz_analyze_block(const u8 uncompressed_data[], */ if (prev_len >= params->min_match && match_len <= prev_len) { - /* Do not insert strings in hash table beyond this. */ - unsigned max_insert = uncompressed_len - params->min_match; - - /*DEBUG("Recording match (pos = %u, offset = %u, len = %u)\n",*/ - /*cur_input_pos - 1, */ - /*cur_input_pos - 1 - prev_start,*/ - /*prev_len);*/ - match = (*record_match)(cur_input_pos - 1 - prev_start, - prev_len, - record_match_arg1, - record_match_arg2); + (*record_match)(prev_len, + cur_input_pos - 1 - prev_start, + record_ctx); - match_tab[cur_match_pos++] = match; + /* Insert in hash table all strings up to the end of the + * match. strstart-1 and strstart are already inserted. + * If there is not enough lookahead, the last two + * strings are not inserted in the hash table. */ - /* Insert in hash table all strings up to the end of the match. - * strstart-1 and strstart are already inserted. If there is not - * enough lookahead, the last two strings are not inserted in - * the hash table. - */ + /* Do not insert strings in hash table beyond this. */ + unsigned max_insert = window_size - params->min_match; #if LZ_MIN_MATCH == 2 if (prev_len >= 3) #endif @@ -318,7 +287,7 @@ lz_analyze_block(const u8 uncompressed_data[], do { if (++cur_input_pos <= max_insert) { hash = insert_string(hash_tab, prev_tab, - uncompressed_data, + window, cur_input_pos, hash); } @@ -327,30 +296,18 @@ lz_analyze_block(const u8 uncompressed_data[], match_available = false; match_len = params->min_match - 1; } else if (match_available) { - /* If there was no match at the previous position, output a - * single literal. If there was a match but the current match - * is longer, truncate the previous match to a single literal. - */ - - /*DEBUG("Recording litrl (pos = %u, value = %u)\n",*/ - /*cur_input_pos - 1, */ - /*uncompressed_data[cur_input_pos - 1]);*/ - - match = (*record_literal)( - uncompressed_data[cur_input_pos - 1], - record_literal_arg); - match_tab[cur_match_pos++] = match; + /* If there was no match at the previous position, + * output a single literal. If there was a match but the + * current match is longer, truncate the previous match + * to a single literal. */ + (*record_literal)(window[cur_input_pos - 1], record_ctx); } else { /* There is no previous match to compare with, wait for * the next step to decide. */ match_available = true; } - } while (++cur_input_pos < uncompressed_len); - - if (match_available) { - match = (*record_literal)(uncompressed_data[cur_input_pos - 1], - record_literal_arg); - match_tab[cur_match_pos++] = match; - } - return cur_match_pos; + } while (++cur_input_pos < window_size); + + if (match_available) + (*record_literal)(window[cur_input_pos - 1], record_ctx); }