+ struct lz_match match;
+ unsigned cur_pos;
+ unsigned end_pos;
+ struct xpress_mc_pos_data * const optimum = c->optimum;
+
+ if (c->optimum_cur_idx != c->optimum_end_idx) {
+ /* Return previously computed match or literal. */
+ match.len = optimum[c->optimum_cur_idx].next.link -
+ c->optimum_cur_idx;
+ match.offset = optimum[c->optimum_cur_idx].next.match_offset;
+
+ c->optimum_cur_idx = optimum[c->optimum_cur_idx].next.link;
+ return match;
+ }
+
+ c->optimum_cur_idx = 0;
+ c->optimum_end_idx = 0;
+
+ num_matches = xpress_get_matches(c, &matches);
+
+ if (num_matches == 0)
+ return (struct lz_match) {};
+
+ if (matches[num_matches - 1].len >= c->params.nice_match_length) {
+ /* Take the long match immediately. */
+ xpress_skip_bytes(c, matches[num_matches - 1].len - 1);
+ return matches[num_matches - 1];
+ }
+
+ /* Consider coding a literal. */
+ optimum[1].cost = xpress_prev_literal_cost(c);
+ optimum[1].prev.link = 0;
+
+ optimum[2].cost = MC_INFINITE_COST;
+
+ {
+ /* Consider coding a match. Cost evaluation is hand-inlined so
+ * that we can do some performance hacks. */
+
+ unsigned i = 0;
+ unsigned len = 3;
+ struct xpress_mc_pos_data *optimum_ptr = &optimum[len];
+
+ if (matches[num_matches - 1].len < 0xf + XPRESS_MIN_MATCH_LEN) {
+ do {
+ u32 offset = matches[i].offset;
+ u32 offset_bsr = bsr32(offset);
+ unsigned len_hdr = len - XPRESS_MIN_MATCH_LEN;
+ unsigned sym = XPRESS_NUM_CHARS +
+ ((offset_bsr << 4) | len_hdr);
+ do {
+ optimum_ptr->prev.link = 0;
+ optimum_ptr->prev.match_offset = offset;
+ optimum_ptr->cost = offset_bsr + c->costs[sym];
+ sym++;
+ optimum_ptr++;
+ } while (++len <= matches[i].len);
+ } while (++i != num_matches);
+ } else {
+ do {
+ u32 offset = matches[i].offset;
+ u32 offset_bsr = bsr32(offset);
+ do {
+ u32 adjusted_len = len - XPRESS_MIN_MATCH_LEN;
+ unsigned len_hdr = min(adjusted_len, 0xf);
+ unsigned sym = XPRESS_NUM_CHARS +
+ ((offset_bsr << 4) | len_hdr);
+ u32 cost = offset_bsr + c->costs[sym];
+ if (adjusted_len >= 0xf) {
+ cost += 8;
+ if (adjusted_len - 0xf >= 0xff)
+ cost += 16;
+ }
+
+ optimum_ptr->prev.link = 0;
+ optimum_ptr->prev.match_offset = offset;
+ optimum_ptr->cost = cost;
+ optimum_ptr++;
+ } while (++len <= matches[i].len);
+ } while (++i != num_matches);
+ }
+ }
+
+ end_pos = matches[num_matches - 1].len;
+ cur_pos = 1;
+ do {
+ u32 cost;
+ u32 longest_len;
+
+ num_matches = xpress_get_matches(c, &matches);
+
+ if (num_matches) {
+ longest_len = matches[num_matches - 1].len;
+ if (longest_len >= c->params.nice_match_length) {
+ /* Take the long match immediately. */
+ match = xpress_match_chooser_reverse_list(c, cur_pos);
+
+ optimum[cur_pos].next.match_offset =
+ matches[num_matches - 1].offset;
+ optimum[cur_pos].next.link = cur_pos + longest_len;
+ c->optimum_end_idx = cur_pos + longest_len;
+
+ xpress_skip_bytes(c, longest_len - 1);
+
+ return match;
+ }
+ } else {
+ longest_len = 1;
+ }
+
+ while (end_pos < cur_pos + longest_len)
+ optimum[++end_pos].cost = MC_INFINITE_COST;
+
+ /* Consider coding a literal. */
+ cost = optimum[cur_pos].cost + xpress_prev_literal_cost(c);
+ if (cost < optimum[cur_pos + 1].cost) {
+ optimum[cur_pos + 1].cost = cost;
+ optimum[cur_pos + 1].prev.link = cur_pos;
+ }
+
+ if (num_matches) {
+ /* Consider coding a match. Cost evaluation is
+ * hand-inlined so that we can do some performance
+ * hacks. */
+ unsigned i = 0;
+ unsigned len = 3;
+ struct xpress_mc_pos_data *optimum_ptr = &optimum[cur_pos + 3];
+ u32 cur_cost = optimum[cur_pos].cost;
+
+ if (matches[num_matches - 1].len < 0xf + XPRESS_MIN_MATCH_LEN) {
+ do {
+ u32 offset = matches[i].offset;
+ u32 offset_bsr = bsr32(offset);
+ unsigned len_hdr = len - XPRESS_MIN_MATCH_LEN;
+ unsigned sym = XPRESS_NUM_CHARS +
+ ((offset_bsr << 4) | len_hdr);
+
+ u32 base_cost = cur_cost + offset_bsr;
+ do {
+ cost = base_cost + c->costs[sym];
+ if (cost < optimum_ptr->cost) {
+ optimum_ptr->prev.link = cur_pos;
+ optimum_ptr->prev.match_offset = offset;
+ optimum_ptr->cost = cost;
+ }
+ sym++;
+ optimum_ptr++;
+ } while (++len <= matches[i].len);
+ } while (++i != num_matches);
+ } else {
+ do {
+ u32 offset = matches[i].offset;
+ u32 offset_bsr = bsr32(offset);
+
+ u32 base_cost = cur_cost + offset_bsr;
+ do {
+ u32 adjusted_len = len - XPRESS_MIN_MATCH_LEN;
+ unsigned len_hdr = min(adjusted_len, 0xf);
+ unsigned sym = XPRESS_NUM_CHARS +
+ ((offset_bsr << 4) | len_hdr);
+
+ cost = base_cost + c->costs[sym];
+ if (adjusted_len >= 0xf) {
+ cost += 8;
+ if (adjusted_len - 0xf >= 0xff)
+ cost += 16;
+ }
+
+ if (cost < optimum_ptr->cost) {
+ optimum_ptr->prev.link = cur_pos;
+ optimum_ptr->prev.match_offset = offset;
+ optimum_ptr->cost = cost;
+ }
+ optimum_ptr++;
+ } while (++len <= matches[i].len);
+ } while (++i != num_matches);
+ }
+ }
+
+ cur_pos++;
+
+ } while (cur_pos != end_pos && cur_pos != XPRESS_OPTIM_ARRAY_LENGTH);
+
+ return xpress_match_chooser_reverse_list(c, cur_pos);
+}
+
+/* Lazy parsing. */
+static struct lz_match
+xpress_choose_lazy_item(struct xpress_compressor *c)
+{
+ const struct lz_match *matches;
+ struct lz_match cur_match;
+ struct lz_match next_match;
+ u32 num_matches;
+
+ if (c->prev_match.len) {
+ cur_match = c->prev_match;
+ c->prev_match.len = 0;
+ } else {
+ num_matches = xpress_get_matches(c, &matches);
+ if (num_matches == 0 ||
+ (matches[num_matches - 1].len == 3 &&
+ matches[num_matches - 1].offset >= c->len_3_too_far))
+ {
+ cur_match.len = 0;
+ return cur_match;
+ }
+
+ /* With lazy parsing we only consider the longest match at each
+ * position. */
+ cur_match = matches[num_matches - 1];
+ }
+
+ if (cur_match.len >= c->params.nice_match_length) {
+ xpress_skip_bytes(c, cur_match.len - 1);
+ return cur_match;
+ }
+
+ num_matches = xpress_get_matches(c, &matches);
+ if (num_matches == 0 ||
+ (matches[num_matches - 1].len == 3 &&
+ matches[num_matches - 1].offset >= c->len_3_too_far))
+ {
+ xpress_skip_bytes(c, cur_match.len - 2);
+ return cur_match;
+ }
+
+ next_match = matches[num_matches - 1];
+
+ if (next_match.len <= cur_match.len) {
+ xpress_skip_bytes(c, cur_match.len - 2);
+ return cur_match;
+ } else {
+ /* Longer match at next position. Choose a literal here so we
+ * will get to use the longer match. */
+ c->prev_match = next_match;
+ cur_match.len = 0;
+ return cur_match;
+ }
+}
+
+/* Greedy parsing. */
+static struct lz_match
+xpress_choose_greedy_item(struct xpress_compressor *c)
+{
+ const struct lz_match *matches;
+ u32 num_matches;
+
+ num_matches = xpress_get_matches(c, &matches);
+ if (num_matches == 0 ||
+ (matches[num_matches - 1].len == 3 &&
+ matches[num_matches - 1].offset >= c->len_3_too_far))
+ return (struct lz_match) {};
+
+ xpress_skip_bytes(c, matches[num_matches - 1].len - 1);
+ return matches[num_matches - 1];
+}
+
+/* Always choose a literal. */
+static struct lz_match
+xpress_choose_literal(struct xpress_compressor *c)
+{
+ return (struct lz_match) {};
+}
+
+/*
+ * Return the next match or literal to use, delegating to the currently selected
+ * match-choosing algorithm.
+ *
+ * If the length of the returned 'struct lz_match' is less than
+ * XPRESS_MIN_MATCH_LEN, then it is really a literal.
+ */
+static inline struct lz_match
+xpress_choose_item(struct xpress_compressor *c)
+{
+ return (*c->params.choose_item_func)(c);
+}
+
+/* Set default XPRESS Huffman symbol costs to kick-start the iterative
+ * optimization algorithm. */
+static void
+xpress_set_default_costs(u8 costs[])
+{