From 3c96d0c4ff2175676ec8e0e204032ec0e254353e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 29 Dec 2013 22:50:45 -0600 Subject: [PATCH] Factor out LZ match-choosing code --- Makefile.am | 1 + include/wimlib/lz_optimal.h | 410 ++++++++++++++++++++++++++++++++++++ src/lzms-compress.c | 2 + src/lzx-compress.c | 343 ++++-------------------------- 4 files changed, 451 insertions(+), 305 deletions(-) create mode 100644 include/wimlib/lz_optimal.h diff --git a/Makefile.am b/Makefile.am index 52ba9670..06400403 100644 --- a/Makefile.am +++ b/Makefile.am @@ -93,6 +93,7 @@ libwim_la_SOURCES = \ include/wimlib/lookup_table.h \ include/wimlib/lz.h \ include/wimlib/lz_hash.h \ + include/wimlib/lz_optimal \ include/wimlib/lz_sarray.h \ include/wimlib/lzms.h \ include/wimlib/lzx.h \ diff --git a/include/wimlib/lz_optimal.h b/include/wimlib/lz_optimal.h new file mode 100644 index 00000000..797ba096 --- /dev/null +++ b/include/wimlib/lz_optimal.h @@ -0,0 +1,410 @@ +/* + * lz_optimal.h + * + * Near-optimal LZ (Lempel-Ziv) parsing, or "match choosing". + * + * This is based on the algorithm used in 7-Zip's DEFLATE encoder, written by + * Igor Pavlov. + */ + +/* + * Copyright (C) 2013 Eric Biggers + * + * This file is part of wimlib, a library for working with WIM files. + * + * wimlib is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. + * + * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with wimlib; if not, see http://www.gnu.org/licenses/. + */ + +/* Define the following structures before including this: + * + * LZ_COMPRESSOR + * LZ_FORMAT_STATE */ + +#ifndef _LZ_OPTIMAL_H +#define _LZ_OPTIMAL_H + +#include "wimlib/lz.h" + +typedef input_idx_t lz_mc_cost_t; + +#define LZ_MC_INFINITE_COST (~(lz_mc_cost_t)0) + +/* + * Match chooser position data: + * + * An array of these structures is used during the match-choosing algorithm. + * They correspond to consecutive positions in the window and are used to keep + * track of the cost to reach each position, and the match/literal choices that + * need to be chosen to reach that position. + */ +struct lz_mc_pos_data { + /* The approximate minimum cost, in bits, to reach this position in the + * window which has been found so far. */ + lz_mc_cost_t cost; + + /* The union here is just for clarity, since the fields are used in two + * slightly different ways. Initially, the @prev structure is filled in + * first, and links go from later in the window to earlier in the + * window. Later, @next structure is filled in and links go from + * earlier in the window to later in the window. */ + union { + struct { + /* Position of the start of the match or literal that + * was taken to get to this position in the approximate + * minimum-cost parse. */ + input_idx_t link; + + /* Offset (as in an LZ (length, offset) pair) of the + * match or literal that was taken to get to this + * position in the approximate minimum-cost parse. */ + input_idx_t match_offset; + } prev; + struct { + /* Position at which the match or literal starting at + * this position ends in the minimum-cost parse. */ + input_idx_t link; + + /* Offset (as in an LZ (length, offset) pair) of the + * match or literal starting at this position in the + * approximate minimum-cost parse. */ + input_idx_t match_offset; + } next; + }; + + /* Format-dependent state that exists after an approximate minimum-cost + * path to reach this position is taken. For example, for LZX this is + * the list of recently used match offsets. This could be 0 bytes if + * the format does not have any state that affects match costs. */ + LZ_FORMAT_STATE state; +}; + +struct lz_match_chooser { + /* Temporary space used for the match-choosing algorithm. The size of + * this array must be at least one more than greedy_len but otherwise is + * arbitrary. More space simply allows the match-choosing algorithm to + * potentially find better matches (depending on the input, as always). + */ + struct lz_mc_pos_data *optimum; + input_idx_t array_space; + + /* When a match greater than this length is found, choose it immediately + * without further consideration. */ + input_idx_t greedy_len; + + /* When matches have been chosen, optimum_cur_idx is set to the position + * in the window of the next match/literal to return and optimum_end_idx + * is set to the position in the window at the end of the last + * match/literal to return. */ + input_idx_t optimum_cur_idx; + input_idx_t optimum_end_idx; +}; + +/* Initialize the match-chooser. + * + * After calling this, multiple data buffers can be scanned with it if each is + * preceded with a call to lz_match_chooser_begin(). */ +static bool +lz_match_chooser_init(struct lz_match_chooser *mc, + input_idx_t array_space, + input_idx_t greedy_len, input_idx_t max_match_len) +{ + input_idx_t extra_len = min(greedy_len, max_match_len); + + LZ_ASSERT(array_space > 0); + mc->optimum = MALLOC((array_space + extra_len) * sizeof(mc->optimum[0])); + if (mc->optimum == NULL) + return false; + mc->array_space = array_space; + mc->greedy_len = greedy_len; + return true; +} + +static void +lz_match_chooser_destroy(struct lz_match_chooser *mc) +{ + FREE(mc->optimum); +} + +static void +lz_match_chooser_begin(struct lz_match_chooser *mc) +{ + mc->optimum_cur_idx = 0; + mc->optimum_end_idx = 0; +} + +/* + * Reverse the linked list of near-optimal matches so that they can be returned + * in forwards order. + * + * Returns the first match in the list. + */ +static _always_inline_attribute struct raw_match +lz_match_chooser_reverse_list(struct lz_match_chooser *mc, input_idx_t cur_pos) +{ + unsigned prev_link, saved_prev_link; + unsigned prev_match_offset, saved_prev_match_offset; + + mc->optimum_end_idx = cur_pos; + + saved_prev_link = mc->optimum[cur_pos].prev.link; + saved_prev_match_offset = mc->optimum[cur_pos].prev.match_offset; + + do { + prev_link = saved_prev_link; + prev_match_offset = saved_prev_match_offset; + + saved_prev_link = mc->optimum[prev_link].prev.link; + saved_prev_match_offset = mc->optimum[prev_link].prev.match_offset; + + mc->optimum[prev_link].next.link = cur_pos; + mc->optimum[prev_link].next.match_offset = prev_match_offset; + + cur_pos = prev_link; + } while (cur_pos != 0); + + mc->optimum_cur_idx = mc->optimum[0].next.link; + + return (struct raw_match) + { .len = mc->optimum_cur_idx, + .offset = mc->optimum[0].next.match_offset, + }; +} + +/* Format-specific functions inlined into lz_get_near_optimal_match(). */ + +/* Get the list of possible matches at the next position. The return value must + * be the number of matches found (which may be 0) and a pointer to the returned + * matches must be written into @matches_ret. Matches must be of distinct + * lengths and sorted in decreasing order by length. */ +typedef u32 (*lz_get_matches_t)(LZ_COMPRESSOR *ctx, + const LZ_FORMAT_STATE *state, + struct raw_match **matches_ret); + +/* Skip the specified number of bytes (don't search for matches at them). */ +typedef void (*lz_skip_bytes_t)(LZ_COMPRESSOR *ctx, input_idx_t n); + +/* Get the cost of the literal located at the position at which matches have + * most recently been searched. Note: this currently assumes that literals do + * not affect the LZ_FORMAT_STATE. */ +typedef u32 (lz_get_prev_literal_cost_t)(LZ_COMPRESSOR *ctx); + +/* Get the cost of a match. This can optionally update the @state to take into + * account format-dependent state that affects match costs, such as repeat + * offsets. */ +typedef u32 (lz_get_match_cost_t)(LZ_COMPRESSOR *ctx, LZ_FORMAT_STATE *state, + input_idx_t length, + input_idx_t offset); + +/* + * lz_get_near_optimal_match() - + * + * Choose the optimal match or literal to use at the next position in the input. + * + * Unlike a greedy parser that always takes the longest match, or even a + * parser with one match/literal look-ahead like zlib, the algorithm used here + * may look ahead many matches/literals to determine the optimal match/literal to + * output next. The motivation is that the compression ratio is improved if the + * compressor can do things like use a shorter-than-possible match in order to + * allow a longer match later, and also take into account the Huffman code cost + * model rather than simply assuming that longer is better. + * + * Still, this is not truly an optimal parser because very long matches are + * taken immediately, and the raw match-finder takes some shortcuts. This is + * done to avoid considering many different alternatives that are unlikely to + * be significantly better. + * + * This algorithm is based on that used in 7-Zip's DEFLATE encoder. + * + * Each call to this function does one of two things: + * + * 1. Build a near-optimal sequence of matches/literals, up to some point, that + * will be returned by subsequent calls to this function, then return the + * first one. + * + * OR + * + * 2. Return the next match/literal previously computed by a call to this + * function; + * + * The return value is a (length, offset) pair specifying the match or literal + * chosen. For literals, the length is 0 or 1 and the offset is meaningless. + */ +static _always_inline_attribute struct raw_match +lz_get_near_optimal_match(struct lz_match_chooser *mc, + lz_get_matches_t get_matches, + lz_skip_bytes_t skip_bytes, + lz_get_prev_literal_cost_t get_prev_literal_cost, + lz_get_match_cost_t get_match_cost, + LZ_COMPRESSOR *ctx, + const LZ_FORMAT_STATE *initial_state) +{ + u32 num_possible_matches; + struct raw_match *possible_matches; + struct raw_match match; + input_idx_t longest_match_len; + + if (mc->optimum_cur_idx != mc->optimum_end_idx) { + /* Case 2: Return the next match/literal already found. */ + match.len = mc->optimum[mc->optimum_cur_idx].next.link - + mc->optimum_cur_idx; + match.offset = mc->optimum[mc->optimum_cur_idx].next.match_offset; + + mc->optimum_cur_idx = mc->optimum[mc->optimum_cur_idx].next.link; + return match; + } + + /* Case 1: Compute a new list of matches/literals to return. */ + + mc->optimum_cur_idx = 0; + mc->optimum_end_idx = 0; + + /* Get matches at this position. */ + num_possible_matches = (*get_matches)(ctx, + initial_state, + &possible_matches); + + /* If no matches found, return literal. */ + if (num_possible_matches == 0) + return (struct raw_match){ .len = 0 }; + + /* The matches that were found are sorted in decreasing order by length. + * Get the length of the longest one. */ + longest_match_len = possible_matches[0].len; + + /* Greedy heuristic: if the longest match that was found is greater + * than the number of fast bytes, return it immediately; don't both + * doing more work. */ + if (longest_match_len > mc->greedy_len) { + (*skip_bytes)(ctx, longest_match_len - 1); + return possible_matches[0]; + } + + /* Calculate the cost to reach the next position by outputting a + * literal. */ + mc->optimum[0].state = *initial_state; + mc->optimum[1].state = mc->optimum[0].state; + mc->optimum[1].cost = (*get_prev_literal_cost)(ctx); + mc->optimum[1].prev.link = 0; + + /* Calculate the cost to reach any position up to and including that + * reached by the longest match, using the shortest (i.e. closest) match + * that reaches each position. */ + for (input_idx_t len = 2, match_idx = num_possible_matches - 1; + len <= longest_match_len; len++) + { + + LZ_ASSERT(match_idx < num_possible_matches); + + mc->optimum[len].state = mc->optimum[0].state; + mc->optimum[len].prev.link = 0; + mc->optimum[len].prev.match_offset = possible_matches[match_idx].offset; + mc->optimum[len].cost = (*get_match_cost)(ctx, + &mc->optimum[len].state, + len, + possible_matches[match_idx].offset); + if (len == possible_matches[match_idx].len) + match_idx--; + } + + input_idx_t cur_pos = 0; + + /* len_end: greatest index forward at which costs have been calculated + * so far */ + input_idx_t len_end = longest_match_len; + + for (;;) { + /* Advance to next position. */ + cur_pos++; + + if (cur_pos == len_end || cur_pos == mc->array_space) + return lz_match_chooser_reverse_list(mc, cur_pos); + + /* retrieve the number of matches available at this position */ + num_possible_matches = (*get_matches)(ctx, + &mc->optimum[cur_pos].state, + &possible_matches); + + input_idx_t new_len = 0; + + if (num_possible_matches != 0) { + new_len = possible_matches[0].len; + + /* Greedy heuristic: if we found a match greater than + * the number of fast bytes, stop immediately. */ + if (new_len > mc->greedy_len) { + + /* Build the list of matches to return and get + * the first one. */ + match = lz_match_chooser_reverse_list(mc, cur_pos); + + /* Append the long match to the end of the list. */ + mc->optimum[cur_pos].next.match_offset = + possible_matches[0].offset; + mc->optimum[cur_pos].next.link = cur_pos + new_len; + mc->optimum_end_idx = cur_pos + new_len; + + /* Skip over the remaining bytes of the long match. */ + (*skip_bytes)(ctx, new_len - 1); + + /* Return first match in the list */ + return match; + } + } + + /* Consider proceeding with a literal byte. */ + lz_mc_cost_t cur_cost = mc->optimum[cur_pos].cost; + lz_mc_cost_t cur_plus_literal_cost = cur_cost + + (*get_prev_literal_cost)(ctx); + if (cur_plus_literal_cost < mc->optimum[cur_pos + 1].cost) { + mc->optimum[cur_pos + 1].cost = cur_plus_literal_cost; + mc->optimum[cur_pos + 1].prev.link = cur_pos; + mc->optimum[cur_pos + 1].state = mc->optimum[cur_pos].state; + } + + if (num_possible_matches == 0) + continue; + + /* Consider proceeding with a match. */ + + while (len_end < cur_pos + new_len) + mc->optimum[++len_end].cost = LZ_MC_INFINITE_COST; + + for (input_idx_t len = 2, match_idx = num_possible_matches - 1; + len <= new_len; len++) + { + LZX_ASSERT(match_idx < num_possible_matches); + + LZ_FORMAT_STATE state = mc->optimum[cur_pos].state; + lz_mc_cost_t cost; + + cost = cur_cost + (*get_match_cost)(ctx, + &state, + len, + possible_matches[match_idx].offset); + + if (cost < mc->optimum[cur_pos + len].cost) { + mc->optimum[cur_pos + len].cost = cost; + mc->optimum[cur_pos + len].prev.link = cur_pos; + mc->optimum[cur_pos + len].prev.match_offset = + possible_matches[match_idx].offset; + mc->optimum[cur_pos + len].state = state; + } + + if (len == possible_matches[match_idx].len) + match_idx--; + } + } +} + +#endif /* _LZ_OPTIMAL_H */ diff --git a/src/lzms-compress.c b/src/lzms-compress.c index 5a2e3ed5..9373dc22 100644 --- a/src/lzms-compress.c +++ b/src/lzms-compress.c @@ -47,6 +47,8 @@ #include #include +#define LZMS_OPTIM_ARRAY_SIZE 1024 + /* Stucture used for writing raw bits to the end of the LZMS-compressed data as * a series of 16-bit little endian coding units. */ struct lzms_output_bitstream { diff --git a/src/lzx-compress.c b/src/lzx-compress.c index 056dc83b..25712476 100644 --- a/src/lzx-compress.c +++ b/src/lzx-compress.c @@ -171,7 +171,7 @@ #endif typedef u32 block_cost_t; -#define INFINITE_BLOCK_COST ((block_cost_t)~0U) +#define INFINITE_BLOCK_COST (~(block_cost_t)0) #define LZX_OPTIM_ARRAY_SIZE 4096 @@ -264,50 +264,11 @@ struct lzx_block_spec { struct lzx_codes codes; }; -/* - * An array of these structures is used during the match-choosing algorithm. - * They correspond to consecutive positions in the window and are used to keep - * track of the cost to reach each position, and the match/literal choices that - * need to be chosen to reach that position. - */ -struct lzx_optimal { - /* The approximate minimum cost, in bits, to reach this position in the - * window which has been found so far. */ - block_cost_t cost; - - /* The union here is just for clarity, since the fields are used in two - * slightly different ways. Initially, the @prev structure is filled in - * first, and links go from later in the window to earlier in the - * window. Later, @next structure is filled in and links go from - * earlier in the window to later in the window. */ - union { - struct { - /* Position of the start of the match or literal that - * was taken to get to this position in the approximate - * minimum-cost parse. */ - input_idx_t link; - - /* Offset (as in an LZ (length, offset) pair) of the - * match or literal that was taken to get to this - * position in the approximate minimum-cost parse. */ - input_idx_t match_offset; - } prev; - struct { - /* Position at which the match or literal starting at - * this position ends in the minimum-cost parse. */ - input_idx_t link; - - /* Offset (as in an LZ (length, offset) pair) of the - * match or literal starting at this position in the - * approximate minimum-cost parse. */ - input_idx_t match_offset; - } next; - }; - - /* The match offset LRU queue that will exist when the approximate - * minimum-cost path to reach this position is taken. */ - struct lzx_lru_queue queue; -}; +/* Include template for the match-choosing algorithm. */ +#define LZ_COMPRESSOR struct lzx_compressor +#define LZ_FORMAT_STATE struct lzx_lru_queue +struct lzx_compressor; +#include "wimlib/lz_optimal.h" /* State of the LZX compressor. */ struct lzx_compressor { @@ -387,23 +348,8 @@ struct lzx_compressor { unsigned cached_matches_pos; bool matches_cached; - /* Slow algorithm only: Temporary space used for match-choosing - * algorithm. - * - * The size of this array must be at least LZX_MAX_MATCH_LEN but - * otherwise is arbitrary. More space simply allows the match-choosing - * algorithm to potentially find better matches (depending on the input, - * as always). */ - struct lzx_optimal *optimum; - - /* Slow algorithm only: Variables used by the match-choosing algorithm. - * - * When matches have been chosen, optimum_cur_idx is set to the position - * in the window of the next match/literal to return and optimum_end_idx - * is set to the position in the window at the end of the last - * match/literal to return. */ - u32 optimum_cur_idx; - u32 optimum_end_idx; + /* Match chooser. */ + struct lz_match_chooser mc; }; /* Returns the LZX position slot that corresponds to a given match offset, @@ -1172,7 +1118,7 @@ lzx_set_costs(struct lzx_compressor * ctx, const struct lzx_lens * lens) /* Tell the match-finder to skip the specified number of bytes (@n) in the * input. */ static void -lzx_lz_skip_bytes(struct lzx_compressor *ctx, unsigned n) +lzx_lz_skip_bytes(struct lzx_compressor *ctx, input_idx_t n) { LZX_ASSERT(n <= ctx->match_window_end - ctx->match_window_pos); if (ctx->matches_cached) { @@ -1195,12 +1141,12 @@ lzx_lz_skip_bytes(struct lzx_compressor *ctx, unsigned n) * * The matches are written to ctx->matches in decreasing order of length, and * the return value is the number of matches found. */ -static unsigned +static u32 lzx_lz_get_matches_caching(struct lzx_compressor *ctx, const struct lzx_lru_queue *queue, struct raw_match **matches_ret) { - unsigned num_matches; + u32 num_matches; struct raw_match *matches; LZX_ASSERT(ctx->match_window_pos <= ctx->match_window_end); @@ -1224,7 +1170,7 @@ lzx_lz_get_matches_caching(struct lzx_compressor *ctx, * if it is not the whole window. */ if (ctx->match_window_end < ctx->window_size) { unsigned maxlen = ctx->match_window_end - ctx->match_window_pos; - for (unsigned i = 0; i < num_matches; i++) + for (u32 i = 0; i < num_matches; i++) if (matches[i].len > maxlen) matches[i].len = maxlen; } @@ -1236,7 +1182,7 @@ lzx_lz_get_matches_caching(struct lzx_compressor *ctx, #endif #ifdef ENABLE_LZX_DEBUG - for (unsigned i = 0; i < num_matches; i++) { + for (u32 i = 0; i < num_matches; i++) { LZX_ASSERT(matches[i].len >= LZX_MIN_MATCH_LEN); LZX_ASSERT(matches[i].len <= LZX_MAX_MATCH_LEN); LZX_ASSERT(matches[i].len <= ctx->match_window_end - ctx->match_window_pos); @@ -1252,244 +1198,31 @@ lzx_lz_get_matches_caching(struct lzx_compressor *ctx, return num_matches; } -/* - * Reverse the linked list of near-optimal matches so that they can be returned - * in forwards order. - * - * Returns the first match in the list. - */ -static struct raw_match -lzx_lz_reverse_near_optimal_match_list(struct lzx_compressor *ctx, - unsigned cur_pos) +static u32 +lzx_get_prev_literal_cost(struct lzx_compressor *ctx) { - unsigned prev_link, saved_prev_link; - unsigned prev_match_offset, saved_prev_match_offset; - - ctx->optimum_end_idx = cur_pos; - - saved_prev_link = ctx->optimum[cur_pos].prev.link; - saved_prev_match_offset = ctx->optimum[cur_pos].prev.match_offset; - - do { - prev_link = saved_prev_link; - prev_match_offset = saved_prev_match_offset; - - saved_prev_link = ctx->optimum[prev_link].prev.link; - saved_prev_match_offset = ctx->optimum[prev_link].prev.match_offset; - - ctx->optimum[prev_link].next.link = cur_pos; - ctx->optimum[prev_link].next.match_offset = prev_match_offset; - - cur_pos = prev_link; - } while (cur_pos != 0); - - ctx->optimum_cur_idx = ctx->optimum[0].next.link; + return lzx_literal_cost(ctx->window[ctx->match_window_pos - 1], + &ctx->costs); +} - return (struct raw_match) - { .len = ctx->optimum_cur_idx, - .offset = ctx->optimum[0].next.match_offset, - }; +static u32 +lzx_get_match_cost(struct lzx_compressor *ctx, + struct lzx_lru_queue *queue, + input_idx_t length, input_idx_t offset) +{ + return lzx_match_cost(length, offset, &ctx->costs, queue); } -/* - * lzx_lz_get_near_optimal_match() - - * - * Choose the optimal match or literal to use at the next position in the input. - * - * Unlike a greedy parser that always takes the longest match, or even a - * parser with one match/literal look-ahead like zlib, the algorithm used here - * may look ahead many matches/literals to determine the optimal match/literal to - * output next. The motivation is that the compression ratio is improved if the - * compressor can do things like use a shorter-than-possible match in order to - * allow a longer match later, and also take into account the Huffman code cost - * model rather than simply assuming that longer is better. - * - * Still, this is not truly an optimal parser because very long matches are - * taken immediately, and the raw match-finder takes some shortcuts. This is - * done to avoid considering many different alternatives that are unlikely to - * be significantly better. - * - * This algorithm is based on that used in 7-Zip's DEFLATE encoder. - * - * Each call to this function does one of two things: - * - * 1. Build a near-optimal sequence of matches/literals, up to some point, that - * will be returned by subsequent calls to this function, then return the - * first one. - * - * OR - * - * 2. Return the next match/literal previously computed by a call to this - * function; - * - * This function relies on the following state in the compressor context: - * - * ctx->window (read-only: preprocessed data being compressed) - * ctx->cost (read-only: cost model to use) - * ctx->optimum (internal state; leave uninitialized) - * ctx->optimum_cur_idx (must set to 0 before first call) - * ctx->optimum_end_idx (must set to 0 before first call) - * - * Plus any state used by the raw match-finder. - * - * The return value is a (length, offset) pair specifying the match or literal - * chosen. For literals, the length is less than LZX_MIN_MATCH_LEN and the - * offset is meaningless. - */ static struct raw_match -lzx_lz_get_near_optimal_match(struct lzx_compressor * ctx) +lzx_lz_get_near_optimal_match(struct lzx_compressor *ctx) { - unsigned num_possible_matches; - struct raw_match *possible_matches; - struct raw_match match; - unsigned longest_match_len; - - if (ctx->optimum_cur_idx != ctx->optimum_end_idx) { - /* Case 2: Return the next match/literal already found. */ - match.len = ctx->optimum[ctx->optimum_cur_idx].next.link - - ctx->optimum_cur_idx; - match.offset = ctx->optimum[ctx->optimum_cur_idx].next.match_offset; - - ctx->optimum_cur_idx = ctx->optimum[ctx->optimum_cur_idx].next.link; - return match; - } - - /* Case 1: Compute a new list of matches/literals to return. */ - - ctx->optimum_cur_idx = 0; - ctx->optimum_end_idx = 0; - - /* Get matches at this position. */ - num_possible_matches = lzx_lz_get_matches_caching(ctx, &ctx->queue, &possible_matches); - - /* If no matches found, return literal. */ - if (num_possible_matches == 0) - return (struct raw_match){ .len = 0 }; - - /* The matches that were found are sorted in decreasing order by length. - * Get the length of the longest one. */ - longest_match_len = possible_matches[0].len; - - /* Greedy heuristic: if the longest match that was found is greater - * than the number of fast bytes, return it immediately; don't both - * doing more work. */ - if (longest_match_len > ctx->params.alg_params.slow.num_fast_bytes) { - lzx_lz_skip_bytes(ctx, longest_match_len - 1); - return possible_matches[0]; - } - - /* Calculate the cost to reach the next position by outputting a - * literal. */ - ctx->optimum[0].queue = ctx->queue; - ctx->optimum[1].queue = ctx->optimum[0].queue; - ctx->optimum[1].cost = lzx_literal_cost(ctx->window[ctx->match_window_pos], - &ctx->costs); - ctx->optimum[1].prev.link = 0; - - /* Calculate the cost to reach any position up to and including that - * reached by the longest match, using the shortest (i.e. closest) match - * that reaches each position. */ - BUILD_BUG_ON(LZX_MIN_MATCH_LEN != 2); - for (unsigned len = LZX_MIN_MATCH_LEN, match_idx = num_possible_matches - 1; - len <= longest_match_len; len++) { - - LZX_ASSERT(match_idx < num_possible_matches); - - ctx->optimum[len].queue = ctx->optimum[0].queue; - ctx->optimum[len].prev.link = 0; - ctx->optimum[len].prev.match_offset = possible_matches[match_idx].offset; - ctx->optimum[len].cost = lzx_match_cost(len, - possible_matches[match_idx].offset, - &ctx->costs, - &ctx->optimum[len].queue); - if (len == possible_matches[match_idx].len) - match_idx--; - } - - unsigned cur_pos = 0; - - /* len_end: greatest index forward at which costs have been calculated - * so far */ - unsigned len_end = longest_match_len; - - for (;;) { - /* Advance to next position. */ - cur_pos++; - - if (cur_pos == len_end || cur_pos == LZX_OPTIM_ARRAY_SIZE) - return lzx_lz_reverse_near_optimal_match_list(ctx, cur_pos); - - /* retrieve the number of matches available at this position */ - num_possible_matches = lzx_lz_get_matches_caching(ctx, &ctx->optimum[cur_pos].queue, - &possible_matches); - - unsigned new_len = 0; - - if (num_possible_matches != 0) { - new_len = possible_matches[0].len; - - /* Greedy heuristic: if we found a match greater than - * the number of fast bytes, stop immediately. */ - if (new_len > ctx->params.alg_params.slow.num_fast_bytes) { - - /* Build the list of matches to return and get - * the first one. */ - match = lzx_lz_reverse_near_optimal_match_list(ctx, cur_pos); - - /* Append the long match to the end of the list. */ - ctx->optimum[cur_pos].next.match_offset = - possible_matches[0].offset; - ctx->optimum[cur_pos].next.link = cur_pos + new_len; - ctx->optimum_end_idx = cur_pos + new_len; - - /* Skip over the remaining bytes of the long match. */ - lzx_lz_skip_bytes(ctx, new_len - 1); - - /* Return first match in the list */ - return match; - } - } - - /* Consider proceeding with a literal byte. */ - block_cost_t cur_cost = ctx->optimum[cur_pos].cost; - block_cost_t cur_plus_literal_cost = cur_cost + - lzx_literal_cost(ctx->window[ctx->match_window_pos - 1], - &ctx->costs); - if (cur_plus_literal_cost < ctx->optimum[cur_pos + 1].cost) { - ctx->optimum[cur_pos + 1].cost = cur_plus_literal_cost; - ctx->optimum[cur_pos + 1].prev.link = cur_pos; - ctx->optimum[cur_pos + 1].queue = ctx->optimum[cur_pos].queue; - } - - if (num_possible_matches == 0) - continue; - - /* Consider proceeding with a match. */ - - while (len_end < cur_pos + new_len) - ctx->optimum[++len_end].cost = INFINITE_BLOCK_COST; - - for (unsigned len = LZX_MIN_MATCH_LEN, match_idx = num_possible_matches - 1; - len <= new_len; len++) { - LZX_ASSERT(match_idx < num_possible_matches); - struct lzx_lru_queue q = ctx->optimum[cur_pos].queue; - block_cost_t cost = cur_cost + lzx_match_cost(len, - possible_matches[match_idx].offset, - &ctx->costs, - &q); - - if (cost < ctx->optimum[cur_pos + len].cost) { - ctx->optimum[cur_pos + len].cost = cost; - ctx->optimum[cur_pos + len].prev.link = cur_pos; - ctx->optimum[cur_pos + len].prev.match_offset = - possible_matches[match_idx].offset; - ctx->optimum[cur_pos + len].queue = q; - } - - if (len == possible_matches[match_idx].len) - match_idx--; - } - } + return lz_get_near_optimal_match(&ctx->mc, + lzx_lz_get_matches_caching, + lzx_lz_skip_bytes, + lzx_get_prev_literal_cost, + lzx_get_match_cost, + ctx, + &ctx->queue); } /* @@ -1605,8 +1338,7 @@ static void lzx_optimize_blocks(struct lzx_compressor *ctx) { lzx_lru_queue_init(&ctx->queue); - ctx->optimum_cur_idx = 0; - ctx->optimum_end_idx = 0; + lz_match_chooser_begin(&ctx->mc); const unsigned num_passes = ctx->params.alg_params.slow.num_optim_passes; @@ -1899,7 +1631,7 @@ lzx_free_compressor(void *_ctx) if (ctx) { FREE(ctx->chosen_matches); FREE(ctx->cached_matches); - FREE(ctx->optimum); + lz_match_chooser_destroy(&ctx->mc); lz_sarray_destroy(&ctx->lz_sarray); FREE(ctx->block_specs); FREE(ctx->prev_tab); @@ -2006,9 +1738,10 @@ lzx_create_compressor(size_t window_size, } if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) { - ctx->optimum = MALLOC((LZX_OPTIM_ARRAY_SIZE + LZX_MAX_MATCH_LEN) * - sizeof(ctx->optimum[0])); - if (ctx->optimum == NULL) + if (!lz_match_chooser_init(&ctx->mc, + LZX_OPTIM_ARRAY_SIZE, + params->alg_params.slow.num_fast_bytes, + LZX_MAX_MATCH_LEN)) goto oom; } -- 2.43.0