+ const struct lz_match *matches;
+ struct lz_match match;
+ unsigned longest_len;
+ unsigned longest_rep_len;
+ u32 longest_rep_offset;
+ unsigned cur_pos;
+ unsigned end_pos;
+
+ if (ctx->optimum_cur_idx != ctx->optimum_end_idx) {
+ /* Case 2: Return the next match/literal already found. */
+ match.len = ctx->optimum[ctx->optimum_cur_idx].next.link -
+ ctx->optimum_cur_idx;
+ match.offset = ctx->optimum[ctx->optimum_cur_idx].next.match_offset;
+
+ ctx->optimum_cur_idx = ctx->optimum[ctx->optimum_cur_idx].next.link;
+ return match;
+ }
+
+ /* Case 1: Compute a new list of matches/literals to return. */
+
+ ctx->optimum_cur_idx = 0;
+ ctx->optimum_end_idx = 0;
+
+ /* Search for matches at recent offsets. Only keep the one with the
+ * longest match length. */
+ longest_rep_len = LZX_MIN_MATCH_LEN - 1;
+ if (ctx->match_window_pos >= 1) {
+ unsigned limit = min(LZX_MAX_MATCH_LEN,
+ ctx->match_window_end - ctx->match_window_pos);
+ for (int i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) {
+ u32 offset = ctx->queue.R[i];
+ const u8 *strptr = &ctx->window[ctx->match_window_pos];
+ const u8 *matchptr = strptr - offset;
+ unsigned len = 0;
+ while (len < limit && strptr[len] == matchptr[len])
+ len++;
+ if (len > longest_rep_len) {
+ longest_rep_len = len;
+ longest_rep_offset = offset;
+ }
+ }
+ }
+
+ /* If there's a long match with a recent offset, take it. */
+ if (longest_rep_len >= ctx->params.alg_params.slow.nice_match_length) {
+ lzx_skip_bytes(ctx, longest_rep_len);
+ return (struct lz_match) {
+ .len = longest_rep_len,
+ .offset = longest_rep_offset,
+ };
+ }
+
+ /* Search other matches. */
+ num_matches = lzx_get_matches(ctx, &matches);
+
+ /* If there's a long match, take it. */
+ if (num_matches) {
+ longest_len = matches[num_matches - 1].len;
+ if (longest_len >= ctx->params.alg_params.slow.nice_match_length) {
+ lzx_skip_bytes(ctx, longest_len - 1);
+ return matches[num_matches - 1];
+ }
+ } else {
+ longest_len = 1;
+ }
+
+ /* Calculate the cost to reach the next position by coding a literal.
+ */
+ ctx->optimum[1].queue = ctx->queue;
+ ctx->optimum[1].cost = lzx_literal_cost(ctx->window[ctx->match_window_pos - 1],
+ &ctx->costs);
+ ctx->optimum[1].prev.link = 0;
+
+ /* Calculate the cost to reach any position up to and including that
+ * reached by the longest match.
+ *
+ * Note: We consider only the lowest-offset match that reaches each
+ * position.
+ *
+ * Note: Some of the cost calculation stays the same for each offset,
+ * regardless of how many lengths it gets used for. Therefore, to
+ * improve performance, we hand-code the cost calculation instead of
+ * calling lzx_match_cost() to do a from-scratch cost evaluation at each
+ * length. */
+ for (unsigned i = 0, len = 2; i < num_matches; i++) {
+ u32 offset;
+ struct lzx_lru_queue queue;
+ u32 position_cost;
+ unsigned position_slot;
+ unsigned num_extra_bits;
+
+ offset = matches[i].offset;
+ queue = ctx->queue;
+ position_cost = 0;
+
+ position_slot = lzx_get_position_slot(offset, &queue);
+ num_extra_bits = lzx_get_num_extra_bits(position_slot);
+ if (num_extra_bits >= 3) {
+ position_cost += num_extra_bits - 3;
+ position_cost += ctx->costs.aligned[(offset + LZX_OFFSET_OFFSET) & 7];
+ } else {
+ position_cost += num_extra_bits;
+ }
+
+ do {
+ unsigned len_header;
+ unsigned main_symbol;
+ u32 cost;
+
+ cost = position_cost;
+
+ len_header = min(len - LZX_MIN_MATCH_LEN, LZX_NUM_PRIMARY_LENS);
+ main_symbol = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
+ cost += ctx->costs.main[main_symbol];
+ if (len_header == LZX_NUM_PRIMARY_LENS)
+ cost += ctx->costs.len[len - LZX_MIN_MATCH_LEN - LZX_NUM_PRIMARY_LENS];
+
+ ctx->optimum[len].queue = queue;
+ ctx->optimum[len].prev.link = 0;
+ ctx->optimum[len].prev.match_offset = offset;
+ ctx->optimum[len].cost = cost;
+ } while (++len <= matches[i].len);
+ }
+ end_pos = longest_len;
+
+ if (longest_rep_len >= LZX_MIN_MATCH_LEN) {
+ struct lzx_lru_queue queue;
+ u32 cost;
+
+ while (end_pos < longest_rep_len)
+ ctx->optimum[++end_pos].cost = MC_INFINITE_COST;
+
+ queue = ctx->queue;
+ cost = lzx_match_cost(longest_rep_len, longest_rep_offset,
+ &ctx->costs, &queue);
+ if (cost <= ctx->optimum[longest_rep_len].cost) {
+ ctx->optimum[longest_rep_len].queue = queue;
+ ctx->optimum[longest_rep_len].prev.link = 0;
+ ctx->optimum[longest_rep_len].prev.match_offset = longest_rep_offset;
+ ctx->optimum[longest_rep_len].cost = cost;
+ }
+ }
+
+ /* Step forward, calculating the estimated minimum cost to reach each
+ * position. The algorithm may find multiple paths to reach each
+ * position; only the lowest-cost path is saved.
+ *
+ * The progress of the parse is tracked in the @ctx->optimum array, which
+ * for each position contains the minimum cost to reach that position,
+ * the index of the start of the match/literal taken to reach that
+ * position through the minimum-cost path, the offset of the match taken
+ * (not relevant for literals), and the adaptive state that will exist
+ * at that position after the minimum-cost path is taken. The @cur_pos
+ * variable stores the position at which the algorithm is currently
+ * considering coding choices, and the @end_pos variable stores the
+ * greatest position at which the costs of coding choices have been
+ * saved. (Actually, the algorithm guarantees that all positions up to
+ * and including @end_pos are reachable by at least one path.)
+ *
+ * The loop terminates when any one of the following conditions occurs:
+ *
+ * 1. A match with length greater than or equal to @nice_match_length is
+ * found. When this occurs, the algorithm chooses this match
+ * unconditionally, and consequently the near-optimal match/literal
+ * sequence up to and including that match is fully determined and it
+ * can begin returning the match/literal list.
+ *
+ * 2. @cur_pos reaches a position not overlapped by a preceding match.
+ * In such cases, the near-optimal match/literal sequence up to
+ * @cur_pos is fully determined and it can begin returning the
+ * match/literal list.
+ *
+ * 3. Failing either of the above in a degenerate case, the loop
+ * terminates when space in the @ctx->optimum array is exhausted.
+ * This terminates the algorithm and forces it to start returning
+ * matches/literals even though they may not be globally optimal.
+ *
+ * Upon loop termination, a nonempty list of matches/literals will have
+ * been produced and stored in the @optimum array. These
+ * matches/literals are linked in reverse order, so the last thing this
+ * function does is reverse this list and return the first
+ * match/literal, leaving the rest to be returned immediately by
+ * subsequent calls to this function.
+ */
+ cur_pos = 0;
+ for (;;) {
+ u32 cost;
+
+ /* Advance to next position. */
+ cur_pos++;
+
+ /* Check termination conditions (2) and (3) noted above. */
+ if (cur_pos == end_pos || cur_pos == LZX_OPTIM_ARRAY_SIZE)
+ return lzx_match_chooser_reverse_list(ctx, cur_pos);
+
+ /* Search for matches at recent offsets. */
+ longest_rep_len = LZX_MIN_MATCH_LEN - 1;
+ unsigned limit = min(LZX_MAX_MATCH_LEN,
+ ctx->match_window_end - ctx->match_window_pos);
+ for (int i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) {
+ u32 offset = ctx->optimum[cur_pos].queue.R[i];
+ const u8 *strptr = &ctx->window[ctx->match_window_pos];
+ const u8 *matchptr = strptr - offset;
+ unsigned len = 0;
+ while (len < limit && strptr[len] == matchptr[len])
+ len++;
+ if (len > longest_rep_len) {
+ longest_rep_len = len;
+ longest_rep_offset = offset;
+ }
+ }
+
+ /* If we found a long match at a recent offset, choose it
+ * immediately. */
+ if (longest_rep_len >= ctx->params.alg_params.slow.nice_match_length) {
+ /* Build the list of matches to return and get
+ * the first one. */
+ match = lzx_match_chooser_reverse_list(ctx, cur_pos);
+
+ /* Append the long match to the end of the list. */
+ ctx->optimum[cur_pos].next.match_offset = longest_rep_offset;
+ ctx->optimum[cur_pos].next.link = cur_pos + longest_rep_len;
+ ctx->optimum_end_idx = cur_pos + longest_rep_len;
+
+ /* Skip over the remaining bytes of the long match. */
+ lzx_skip_bytes(ctx, longest_rep_len);
+
+ /* Return first match in the list. */
+ return match;
+ }
+
+ /* Search other matches. */
+ num_matches = lzx_get_matches(ctx, &matches);
+
+ /* If there's a long match, take it. */
+ if (num_matches) {
+ longest_len = matches[num_matches - 1].len;
+ if (longest_len >= ctx->params.alg_params.slow.nice_match_length) {
+ /* Build the list of matches to return and get
+ * the first one. */
+ match = lzx_match_chooser_reverse_list(ctx, cur_pos);
+
+ /* Append the long match to the end of the list. */
+ ctx->optimum[cur_pos].next.match_offset =
+ matches[num_matches - 1].offset;
+ ctx->optimum[cur_pos].next.link = cur_pos + longest_len;
+ ctx->optimum_end_idx = cur_pos + longest_len;
+
+ /* Skip over the remaining bytes of the long match. */
+ lzx_skip_bytes(ctx, longest_len - 1);
+
+ /* Return first match in the list. */
+ return match;
+ }
+ } else {
+ longest_len = 1;
+ }
+
+ while (end_pos < cur_pos + longest_len)
+ ctx->optimum[++end_pos].cost = MC_INFINITE_COST;
+
+ /* Consider coding a literal. */
+ cost = ctx->optimum[cur_pos].cost +
+ lzx_literal_cost(ctx->window[ctx->match_window_pos - 1],
+ &ctx->costs);
+ if (cost < ctx->optimum[cur_pos + 1].cost) {
+ ctx->optimum[cur_pos + 1].queue = ctx->optimum[cur_pos].queue;
+ ctx->optimum[cur_pos + 1].cost = cost;
+ ctx->optimum[cur_pos + 1].prev.link = cur_pos;
+ }
+
+ /* Consider coding a match.
+ *
+ * The hard-coded cost calculation is done for the same reason
+ * stated in the comment for the similar loop earlier.
+ * Actually, it is *this* one that has the biggest effect on
+ * performance; overall LZX compression is > 10% faster with
+ * this code compared to calling lzx_match_cost() with each
+ * length. */
+ for (unsigned i = 0, len = 2; i < num_matches; i++) {
+ u32 offset;
+ struct lzx_lru_queue queue;
+ u32 position_cost;
+ unsigned position_slot;
+ unsigned num_extra_bits;
+
+ offset = matches[i].offset;
+ queue = ctx->optimum[cur_pos].queue;
+ position_cost = ctx->optimum[cur_pos].cost;
+
+ position_slot = lzx_get_position_slot(offset, &queue);
+ num_extra_bits = lzx_get_num_extra_bits(position_slot);
+ if (num_extra_bits >= 3) {
+ position_cost += num_extra_bits - 3;
+ position_cost += ctx->costs.aligned[
+ (offset + LZX_OFFSET_OFFSET) & 7];
+ } else {
+ position_cost += num_extra_bits;
+ }
+
+ do {
+ unsigned len_header;
+ unsigned main_symbol;
+ u32 cost;
+
+ cost = position_cost;
+
+ len_header = min(len - LZX_MIN_MATCH_LEN,
+ LZX_NUM_PRIMARY_LENS);
+ main_symbol = ((position_slot << 3) | len_header) +
+ LZX_NUM_CHARS;
+ cost += ctx->costs.main[main_symbol];
+ if (len_header == LZX_NUM_PRIMARY_LENS) {
+ cost += ctx->costs.len[len -
+ LZX_MIN_MATCH_LEN -
+ LZX_NUM_PRIMARY_LENS];
+ }
+ if (cost < ctx->optimum[cur_pos + len].cost) {
+ ctx->optimum[cur_pos + len].queue = queue;
+ ctx->optimum[cur_pos + len].prev.link = cur_pos;
+ ctx->optimum[cur_pos + len].prev.match_offset = offset;
+ ctx->optimum[cur_pos + len].cost = cost;
+ }
+ } while (++len <= matches[i].len);
+ }
+
+ if (longest_rep_len >= LZX_MIN_MATCH_LEN) {
+ struct lzx_lru_queue queue;
+
+ while (end_pos < cur_pos + longest_rep_len)
+ ctx->optimum[++end_pos].cost = MC_INFINITE_COST;
+
+ queue = ctx->optimum[cur_pos].queue;
+
+ cost = ctx->optimum[cur_pos].cost +
+ lzx_match_cost(longest_rep_len, longest_rep_offset,
+ &ctx->costs, &queue);
+ if (cost <= ctx->optimum[cur_pos + longest_rep_len].cost) {
+ ctx->optimum[cur_pos + longest_rep_len].queue =
+ queue;
+ ctx->optimum[cur_pos + longest_rep_len].prev.link =
+ cur_pos;
+ ctx->optimum[cur_pos + longest_rep_len].prev.match_offset =
+ longest_rep_offset;
+ ctx->optimum[cur_pos + longest_rep_len].cost =
+ cost;
+ }
+ }
+ }
+}
+
+/* Set default symbol costs for the LZX Huffman codes. */
+static void
+lzx_set_default_costs(struct lzx_costs * costs, unsigned num_main_syms)
+{