X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=include%2Fwimlib%2Flz_optimal.h;h=95333db6b273851ec423337486f7e4ea45ac2b84;hb=f303b46312f8d8be4210fba66082d5a7572dbd70;hp=f352ea4738ab9e24cda7bf37e26bc00742c28fd1;hpb=0b655dd071e313311df0333af2522de5bdc264bf;p=wimlib diff --git a/include/wimlib/lz_optimal.h b/include/wimlib/lz_optimal.h index f352ea47..95333db6 100644 --- a/include/wimlib/lz_optimal.h +++ b/include/wimlib/lz_optimal.h @@ -54,6 +54,34 @@ #define LZ_MC_INFINITE_COST (~(lz_mc_cost_t)0) +struct lz_mc_pos_data; + +/* State of the Lempel-Ziv match-chooser. + * + * This is defined here for benefit of the inlined code. It's not intended for + * code outside the match-chooser itself to read or write members from this + * structure. */ +struct lz_match_chooser { + /* Temporary space used for the match-choosing algorithm. The size of + * this array must be at least one more than @nice_len but otherwise is + * arbitrary. More space decreases the frequency at which the algorithm + * is forced to terminate early. 4096 spaces seems sufficient for most + * real data. */ + struct lz_mc_pos_data *optimum; + input_idx_t array_space; + + /* When a match with length greater than or equal to this length is + * found, choose it immediately without further consideration. */ + input_idx_t nice_len; + + /* When matches have been chosen, optimum_cur_idx is set to the position + * in the window of the next match/literal to return and optimum_end_idx + * is set to the position in the window at the end of the last + * match/literal to return. */ + input_idx_t optimum_cur_idx; + input_idx_t optimum_end_idx; +}; + /* * Match chooser position data: * @@ -104,27 +132,6 @@ struct lz_mc_pos_data { LZ_ADAPTIVE_STATE state; }; -struct lz_match_chooser { - /* Temporary space used for the match-choosing algorithm. The size of - * this array must be at least one more than @nice_len but otherwise is - * arbitrary. More space decreases the frequency at which the algorithm - * is forced to terminate early. 4096 spaces seems sufficient for most - * real data. */ - struct lz_mc_pos_data *optimum; - input_idx_t array_space; - - /* When a match with length greater than or equal to this length is - * found, choose it immediately without further consideration. */ - input_idx_t nice_len; - - /* When matches have been chosen, optimum_cur_idx is set to the position - * in the window of the next match/literal to return and optimum_end_idx - * is set to the position in the window at the end of the last - * match/literal to return. */ - input_idx_t optimum_cur_idx; - input_idx_t optimum_end_idx; -}; - /* Initialize the match-chooser. * * After calling this, multiple data buffers can be scanned with it if each is @@ -145,6 +152,16 @@ lz_match_chooser_init(struct lz_match_chooser *mc, return true; } +static inline u64 +lz_match_chooser_get_needed_memory(input_idx_t array_space, + input_idx_t nice_len, + input_idx_t max_match_len) +{ + input_idx_t extra_len = min(nice_len, max_match_len); + return ((u64)(array_space + extra_len) * + sizeof(((struct lz_match_chooser*)0)->optimum[0])); +} + /* Free memory allocated in lz_match_chooser_init(). */ static void lz_match_chooser_destroy(struct lz_match_chooser *mc) @@ -204,7 +221,8 @@ lz_match_chooser_reverse_list(struct lz_match_chooser *mc, input_idx_t cur_pos) * be the number of matches found (which may be 0) and a pointer to the returned * matches must be written into @matches_ret. Matches must be of distinct * lengths and sorted in decreasing order by length. Furthermore, match lengths - * may not exceed the @max_match_len passed to lz_match_chooser_init(). */ + * may not exceed the @max_match_len passed to lz_match_chooser_init(), and all + * match lengths must be at least 2. */ typedef u32 (*lz_get_matches_t)(LZ_COMPRESSOR *ctx, const LZ_ADAPTIVE_STATE *state, struct raw_match **matches_ret); @@ -219,16 +237,16 @@ typedef void (*lz_skip_bytes_t)(LZ_COMPRESSOR *ctx, input_idx_t n); * most recently been searched. This can optionally update the @state to take * into account format-dependent state that affects match costs, such as repeat * offsets. */ -typedef lz_mc_cost_t (lz_get_prev_literal_cost_t)(LZ_COMPRESSOR *ctx, - LZ_ADAPTIVE_STATE *state); +typedef lz_mc_cost_t (*lz_get_prev_literal_cost_t)(LZ_COMPRESSOR *ctx, + LZ_ADAPTIVE_STATE *state); /* Get the cost of a match. This can optionally update the @state to take into * account format-dependent state that affects match costs, such as repeat * offsets. */ -typedef lz_mc_cost_t (lz_get_match_cost_t)(LZ_COMPRESSOR *ctx, - LZ_ADAPTIVE_STATE *state, - input_idx_t length, - input_idx_t offset); +typedef lz_mc_cost_t (*lz_get_match_cost_t)(LZ_COMPRESSOR *ctx, + LZ_ADAPTIVE_STATE *state, + input_idx_t length, + input_idx_t offset); /* * lz_get_near_optimal_match() - @@ -395,31 +413,34 @@ lz_get_near_optimal_match(struct lz_match_chooser *mc, * at that position after the minimum-cost path is taken. The @cur_pos * variable stores the position at which the algorithm is currently * considering coding choices, and the @len_end variable stores the - * greatest offset at which the costs of coding choices have been saved. - * (The algorithm guarantees that all positions before @len_end are - * reachable by at least one path and therefore have costs computed.) + * greatest position at which the costs of coding choices have been + * saved. (Actually, the algorithm guarantees that all positions up to + * and including @len_end are reachable by at least one path.) * * The loop terminates when any one of the following conditions occurs: * - * 1. A match greater than @nice_len is found. When this is found, the - * algorithm chooses this match unconditionally, and consequently the - * near-optimal match/literal sequence up to and including that match - * is fully determined. + * 1. A match with length greater than or equal to @nice_len is found. + * When this occurs, the algorithm chooses this match + * unconditionally, and consequently the near-optimal match/literal + * sequence up to and including that match is fully determined and it + * can begin returning the match/literal list. * * 2. @cur_pos reaches a position not overlapped by a preceding match. * In such cases, the near-optimal match/literal sequence up to - * @cur_pos is fully determined. + * @cur_pos is fully determined and it can begin returning the + * match/literal list. * * 3. Failing either of the above in a degenerate case, the loop * terminates when space in the @mc->optimum array is exhausted. * This terminates the algorithm and forces it to start returning * matches/literals even though they may not be globally optimal. * - * Upon loop termination, a nonempty list of matches/literals has been - * produced and stored in the @optimum array. They are linked in - * reverse order, so the last thing this function does is reverse the - * links and return the first match/literal, leaving the rest to be - * returned immediately by subsequent calls to this function. + * Upon loop termination, a nonempty list of matches/literals will have + * been produced and stored in the @optimum array. These + * matches/literals are linked in reverse order, so the last thing this + * function does is reverse this list and return the first + * match/literal, leaving the rest to be returned immediately by + * subsequent calls to this function. */ input_idx_t cur_pos = 0; input_idx_t len_end = longest_match_len;