+ return (*c->get_matches_func)(c, matches_ret);
+}
+
+static void
+xpress_skip_bytes_fillcache(struct xpress_compressor *c, u32 n)
+{
+ struct lz_match *cache_ptr;
+
+ c->cur_window_ptr += n;
+ cache_ptr = c->cache_ptr;
+ lz_mf_skip_positions(c->mf, n);
+ if (likely(cache_ptr <= c->cache_limit)) {
+ do {
+ cache_ptr->len = 0;
+ cache_ptr += 1;
+ } while (--n && likely(cache_ptr <= c->cache_limit));
+ }
+ c->cache_ptr = cache_ptr;
+}
+
+static void
+xpress_skip_bytes_usecache(struct xpress_compressor *c, u32 n)
+{
+ struct lz_match *cache_ptr;
+
+ c->cur_window_ptr += n;
+ cache_ptr = c->cache_ptr;
+ if (likely(cache_ptr <= c->cache_limit)) {
+ do {
+ cache_ptr += 1 + cache_ptr->len;
+ } while (--n && likely(cache_ptr <= c->cache_limit));
+ }
+ c->cache_ptr = cache_ptr;
+}
+
+static void
+xpress_skip_bytes_usecache_nocheck(struct xpress_compressor *c, u32 n)
+{
+ struct lz_match *cache_ptr;
+
+ c->cur_window_ptr += n;
+ cache_ptr = c->cache_ptr;
+ do {
+ cache_ptr += 1 + cache_ptr->len;
+ } while (--n);
+ c->cache_ptr = cache_ptr;
+}
+
+static void
+xpress_skip_bytes_noncaching(struct xpress_compressor *c, u32 n)
+{
+ c->cur_window_ptr += n;
+ lz_mf_skip_positions(c->mf, n);
+}
+
+/*
+ * Skip the specified number of positions in the window (don't search for
+ * matches at them).
+ */
+static inline void
+xpress_skip_bytes(struct xpress_compressor *c, u32 n)
+{
+ return (*c->skip_bytes_func)(c, n);
+}
+
+/*
+ * Returns the cost, in bits, required to output the literal from the previous
+ * window position (the position at which matches were last searched).
+ */
+static inline u32
+xpress_prev_literal_cost(const struct xpress_compressor *c)
+{
+ return c->costs[*(c->cur_window_ptr - 1)];
+}
+
+/*
+ * Reverse the linked list of near-optimal matches so that they can be returned
+ * in forwards order.
+ *
+ * Returns the first match in the list.
+ */
+static struct lz_match
+xpress_match_chooser_reverse_list(struct xpress_compressor *c, unsigned cur_pos)
+{
+ unsigned prev_link, saved_prev_link;
+ u32 prev_match_offset, saved_prev_match_offset;
+
+ c->optimum_end_idx = cur_pos;
+
+ saved_prev_link = c->optimum[cur_pos].prev.link;
+ saved_prev_match_offset = c->optimum[cur_pos].prev.match_offset;
+
+ do {
+ prev_link = saved_prev_link;
+ prev_match_offset = saved_prev_match_offset;
+
+ saved_prev_link = c->optimum[prev_link].prev.link;
+ saved_prev_match_offset = c->optimum[prev_link].prev.match_offset;
+
+ c->optimum[prev_link].next.link = cur_pos;
+ c->optimum[prev_link].next.match_offset = prev_match_offset;
+
+ cur_pos = prev_link;
+ } while (cur_pos != 0);
+
+ c->optimum_cur_idx = c->optimum[0].next.link;
+
+ return (struct lz_match)
+ { .len = c->optimum_cur_idx,
+ .offset = c->optimum[0].next.match_offset,
+ };
+}
+
+/*
+ * Near-optimal parsing.
+ *
+ * This does a forward lowest-cost path search. The search is terminated when a
+ * sufficiently long match is found, when the search reaches a position with no
+ * alternatives, or when the temporary 'optimum' array fills up. After
+ * termination of the search, matches/literals will be returned one by one by
+ * successive calls to this function. Once all the matches/literals are used
+ * up, the next call to this function will begin a new search.
+ */
+static struct lz_match
+xpress_choose_near_optimal_item(struct xpress_compressor *c)
+{
+ const struct lz_match *matches;
+ unsigned num_matches;
+ struct lz_match match;
+ unsigned cur_pos;
+ unsigned end_pos;
+ struct xpress_mc_pos_data * const optimum = c->optimum;
+
+ if (c->optimum_cur_idx != c->optimum_end_idx) {
+ /* Return previously computed match or literal. */
+ match.len = optimum[c->optimum_cur_idx].next.link -
+ c->optimum_cur_idx;
+ match.offset = optimum[c->optimum_cur_idx].next.match_offset;
+
+ c->optimum_cur_idx = optimum[c->optimum_cur_idx].next.link;
+ return match;
+ }
+
+ c->optimum_cur_idx = 0;
+ c->optimum_end_idx = 0;
+
+ num_matches = xpress_get_matches(c, &matches);
+
+ if (num_matches == 0)
+ return (struct lz_match) {};
+
+ if (matches[num_matches - 1].len >= c->params.nice_match_length) {
+ /* Take the long match immediately. */
+ xpress_skip_bytes(c, matches[num_matches - 1].len - 1);
+ return matches[num_matches - 1];
+ }
+
+ /* Consider coding a literal. */
+ optimum[1].cost = xpress_prev_literal_cost(c);
+ optimum[1].prev.link = 0;
+
+ optimum[2].cost = MC_INFINITE_COST;
+
+ {
+ /* Consider coding a match. Cost evaluation is hand-inlined so
+ * that we can do some performance hacks. */
+
+ unsigned i = 0;
+ unsigned len = 3;
+ struct xpress_mc_pos_data *optimum_ptr = &optimum[len];
+
+ if (matches[num_matches - 1].len < 0xf + XPRESS_MIN_MATCH_LEN) {
+ do {
+ u32 offset = matches[i].offset;
+ u32 offset_bsr = bsr32(offset);
+ unsigned len_hdr = len - XPRESS_MIN_MATCH_LEN;
+ unsigned sym = XPRESS_NUM_CHARS +
+ ((offset_bsr << 4) | len_hdr);
+ do {
+ optimum_ptr->prev.link = 0;
+ optimum_ptr->prev.match_offset = offset;
+ optimum_ptr->cost = offset_bsr + c->costs[sym];
+ sym++;
+ optimum_ptr++;
+ } while (++len <= matches[i].len);
+ } while (++i != num_matches);
+ } else {
+ do {
+ u32 offset = matches[i].offset;
+ u32 offset_bsr = bsr32(offset);
+ do {
+ u32 adjusted_len = len - XPRESS_MIN_MATCH_LEN;
+ unsigned len_hdr = min(adjusted_len, 0xf);
+ unsigned sym = XPRESS_NUM_CHARS +
+ ((offset_bsr << 4) | len_hdr);
+ u32 cost = offset_bsr + c->costs[sym];
+ if (adjusted_len >= 0xf) {
+ cost += 8;
+ if (adjusted_len - 0xf >= 0xff)
+ cost += 16;
+ }
+
+ optimum_ptr->prev.link = 0;
+ optimum_ptr->prev.match_offset = offset;
+ optimum_ptr->cost = cost;
+ optimum_ptr++;
+ } while (++len <= matches[i].len);
+ } while (++i != num_matches);
+ }
+ }
+
+ end_pos = matches[num_matches - 1].len;
+ cur_pos = 1;
+ do {
+ u32 cost;
+ u32 longest_len;
+
+ num_matches = xpress_get_matches(c, &matches);
+
+ if (num_matches) {
+ longest_len = matches[num_matches - 1].len;
+ if (longest_len >= c->params.nice_match_length) {
+ /* Take the long match immediately. */
+ match = xpress_match_chooser_reverse_list(c, cur_pos);
+
+ optimum[cur_pos].next.match_offset =
+ matches[num_matches - 1].offset;
+ optimum[cur_pos].next.link = cur_pos + longest_len;
+ c->optimum_end_idx = cur_pos + longest_len;
+
+ xpress_skip_bytes(c, longest_len - 1);
+
+ return match;
+ }
+ } else {
+ longest_len = 1;
+ }
+
+ while (end_pos < cur_pos + longest_len)
+ optimum[++end_pos].cost = MC_INFINITE_COST;
+
+ /* Consider coding a literal. */
+ cost = optimum[cur_pos].cost + xpress_prev_literal_cost(c);
+ if (cost < optimum[cur_pos + 1].cost) {
+ optimum[cur_pos + 1].cost = cost;
+ optimum[cur_pos + 1].prev.link = cur_pos;
+ }
+
+ if (num_matches) {
+ /* Consider coding a match. Cost evaluation is
+ * hand-inlined so that we can do some performance
+ * hacks. */
+ unsigned i = 0;
+ unsigned len = 3;
+ struct xpress_mc_pos_data *optimum_ptr = &optimum[cur_pos + 3];
+ u32 cur_cost = optimum[cur_pos].cost;
+
+ if (matches[num_matches - 1].len < 0xf + XPRESS_MIN_MATCH_LEN) {
+ do {
+ u32 offset = matches[i].offset;
+ u32 offset_bsr = bsr32(offset);
+ unsigned len_hdr = len - XPRESS_MIN_MATCH_LEN;
+ unsigned sym = XPRESS_NUM_CHARS +
+ ((offset_bsr << 4) | len_hdr);
+
+ u32 base_cost = cur_cost + offset_bsr;
+ do {
+ cost = base_cost + c->costs[sym];
+ if (cost < optimum_ptr->cost) {
+ optimum_ptr->prev.link = cur_pos;
+ optimum_ptr->prev.match_offset = offset;
+ optimum_ptr->cost = cost;
+ }
+ sym++;
+ optimum_ptr++;
+ } while (++len <= matches[i].len);
+ } while (++i != num_matches);
+ } else {
+ do {
+ u32 offset = matches[i].offset;
+ u32 offset_bsr = bsr32(offset);
+
+ u32 base_cost = cur_cost + offset_bsr;
+ do {
+ u32 adjusted_len = len - XPRESS_MIN_MATCH_LEN;
+ unsigned len_hdr = min(adjusted_len, 0xf);
+ unsigned sym = XPRESS_NUM_CHARS +
+ ((offset_bsr << 4) | len_hdr);
+
+ cost = base_cost + c->costs[sym];
+ if (adjusted_len >= 0xf) {
+ cost += 8;
+ if (adjusted_len - 0xf >= 0xff)
+ cost += 16;
+ }
+
+ if (cost < optimum_ptr->cost) {
+ optimum_ptr->prev.link = cur_pos;
+ optimum_ptr->prev.match_offset = offset;
+ optimum_ptr->cost = cost;
+ }
+ optimum_ptr++;
+ } while (++len <= matches[i].len);
+ } while (++i != num_matches);
+ }
+ }
+
+ cur_pos++;
+
+ } while (cur_pos != end_pos && cur_pos != XPRESS_OPTIM_ARRAY_LENGTH);
+
+ return xpress_match_chooser_reverse_list(c, cur_pos);
+}
+
+/* Set default XPRESS Huffman symbol costs to kick-start the iterative
+ * optimization algorithm. */
+static void
+xpress_set_default_costs(u8 costs[])
+{
+ unsigned i;
+
+ for (i = 0; i < XPRESS_NUM_CHARS; i++)
+ costs[i] = 8;
+
+ for (; i < XPRESS_NUM_SYMBOLS; i++)
+ costs[i] = 10;
+}
+
+/* Copy the Huffman codeword lengths array @lens to the Huffman symbol costs
+ * array @costs, but also assign a default cost to each 0-length (unused)
+ * codeword. */
+static void
+xpress_set_costs(u8 costs[], const u8 lens[])
+{
+ for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i++)
+ costs[i] = lens[i] ? lens[i] : XPRESS_MAX_CODEWORD_LEN;
+}
+
+/* Near-optimal parsing */
+static u32
+xpress_choose_items_near_optimal(struct xpress_compressor *c)
+{
+ u32 num_passes_remaining = c->params.num_optim_passes;
+ const u8 *window_ptr;
+ const u8 *window_end;
+ struct xpress_item *next_chosen_item;
+ struct lz_match raw_item;
+ struct xpress_item xpress_item;
+
+ xpress_set_default_costs(c->costs);
+ c->optimum_cur_idx = 0;
+ c->optimum_end_idx = 0;
+
+ if (c->params.num_optim_passes > 1) {
+ c->get_matches_func = xpress_get_matches_fillcache;
+ c->skip_bytes_func = xpress_skip_bytes_fillcache;
+ } else {
+ c->get_matches_func = xpress_get_matches_noncaching;
+ c->skip_bytes_func = xpress_skip_bytes_noncaching;
+ }
+
+ lz_mf_load_window(c->mf, c->cur_window, c->cur_window_size);
+
+ while (--num_passes_remaining) {
+ c->cur_window_ptr = c->cur_window;
+ window_ptr = c->cur_window;
+ window_end = window_ptr + c->cur_window_size;
+ c->cache_ptr = c->cached_matches;
+ memset(c->freqs, 0, sizeof(c->freqs));
+
+ while (window_ptr != window_end) {
+ raw_item = xpress_choose_near_optimal_item(c);
+ if (raw_item.len >= XPRESS_MIN_MATCH_LEN) {
+ xpress_tally_match(raw_item.len,
+ raw_item.offset, c->freqs);
+ window_ptr += raw_item.len;
+ } else {
+ xpress_tally_literal(*window_ptr, c->freqs);
+ window_ptr += 1;
+ }
+ }
+ c->freqs[XPRESS_END_OF_DATA]++;
+ xpress_make_huffman_code(c);
+ xpress_set_costs(c->costs, c->lens);
+ if (c->cache_ptr <= c->cache_limit) {
+ c->get_matches_func = xpress_get_matches_usecache_nocheck;
+ c->skip_bytes_func = xpress_skip_bytes_usecache_nocheck;
+ } else {
+ c->get_matches_func = xpress_get_matches_usecache;
+ c->skip_bytes_func = xpress_skip_bytes_usecache;