- * reached by the longest match. */
- matchptr = matches;
- for (unsigned len = 2; len <= longest_len; len++) {
- u32 offset = matchptr->offset;
-
- ctx->optimum[len].queue = ctx->queue;
- ctx->optimum[len].prev.link = 0;
- ctx->optimum[len].prev.match_offset = offset;
- ctx->optimum[len].cost = lzx_match_cost(len, offset, &ctx->costs,
- &ctx->optimum[len].queue);
- if (len == matchptr->len)
- matchptr++;
+ * reached by the longest match.
+ *
+ * Note: We consider only the lowest-offset match that reaches each
+ * position.
+ *
+ * Note: Some of the cost calculation stays the same for each offset,
+ * regardless of how many lengths it gets used for. Therefore, to
+ * improve performance, we hand-code the cost calculation instead of
+ * calling lzx_match_cost() to do a from-scratch cost evaluation at each
+ * length. */
+ for (unsigned i = 0, len = 2; i < num_matches; i++) {
+ u32 offset;
+ struct lzx_lru_queue queue;
+ u32 position_cost;
+ unsigned position_slot;
+ unsigned num_extra_bits;
+
+ offset = matches[i].offset;
+ queue = ctx->queue;
+ position_cost = 0;
+
+ position_slot = lzx_get_position_slot(offset, &queue);
+ num_extra_bits = lzx_get_num_extra_bits(position_slot);
+ if (num_extra_bits >= 3) {
+ position_cost += num_extra_bits - 3;
+ position_cost += ctx->costs.aligned[(offset + LZX_OFFSET_OFFSET) & 7];
+ } else {
+ position_cost += num_extra_bits;
+ }
+
+ do {
+ unsigned len_header;
+ unsigned main_symbol;
+ u32 cost;
+
+ cost = position_cost;
+
+ len_header = min(len - LZX_MIN_MATCH_LEN, LZX_NUM_PRIMARY_LENS);
+ main_symbol = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
+ cost += ctx->costs.main[main_symbol];
+ if (len_header == LZX_NUM_PRIMARY_LENS)
+ cost += ctx->costs.len[len - LZX_MIN_MATCH_LEN - LZX_NUM_PRIMARY_LENS];
+
+ ctx->optimum[len].queue = queue;
+ ctx->optimum[len].prev.link = 0;
+ ctx->optimum[len].prev.match_offset = offset;
+ ctx->optimum[len].cost = cost;
+ } while (++len <= matches[i].len);