+ position_cost = cur_optimum_ptr->cost + offset_bsr;
+ do {
+ cost = position_cost + c->costs[sym];
+ if (cost < (cur_optimum_ptr + len)->cost) {
+ (cur_optimum_ptr + len)->cost = cost;
+ (cur_optimum_ptr + len)->mc_item_data =
+ (offset << MC_OFFSET_SHIFT) | len;
+ }
+ sym++;
+ } while (++len <= matches[i].len);
+ } while (++i != num_matches);
+ } else {
+ /* Some lengths are big. */
+ do {
+ offset = matches[i].offset;
+ offset_bsr = bsr32(offset);
+ position_cost = cur_optimum_ptr->cost + offset_bsr;
+ do {
+ adjusted_len = len - XPRESS_MIN_MATCH_LEN;
+ len_hdr = min(adjusted_len, 0xf);
+ sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr);
+
+ cost = position_cost + c->costs[sym];
+ if (adjusted_len >= 0xf) {
+ cost += 8;
+ if (adjusted_len - 0xf >= 0xff)
+ cost += 16;
+ }
+
+ if (cost < (cur_optimum_ptr + len)->cost) {
+ (cur_optimum_ptr + len)->cost = cost;
+ (cur_optimum_ptr + len)->mc_item_data =
+ (offset << MC_OFFSET_SHIFT) | len;
+ }
+ } while (++len <= matches[i].len);
+ } while (++i != num_matches);
+ }
+}
+
+/*
+ * The main near-optimal parsing routine.
+ *
+ * Briefly, the algorithm does an approximate minimum-cost path search to find a
+ * "near-optimal" sequence of matches and literals to output, based on the
+ * current cost model. The algorithm steps forward, position by position (byte
+ * by byte), and updates the minimum cost path to reach each later position that
+ * can be reached using a match or literal from the current position. This is
+ * essentially Dijkstra's algorithm in disguise: the graph nodes are positions,
+ * the graph edges are possible matches/literals to code, and the cost of each
+ * edge is the estimated number of bits that will be required to output the
+ * corresponding match or literal. But one difference is that we actually
+ * compute the lowest-cost path in pieces, where each piece is terminated when
+ * there are no choices to be made.
+ *
+ * If next_chosen_item != NULL, then all items chosen will be recorded (saved in
+ * the chosen_items array). Otherwise, all items chosen will only be tallied
+ * (symbol frequencies tallied in c->freqs).
+ */
+static void
+xpress_optim_pass(struct xpress_compressor *c,
+ struct xpress_item **next_chosen_item)
+{
+ const u8 *window_end;
+ const u8 *window_ptr;
+ struct xpress_mc_pos_data *cur_optimum_ptr;
+ struct xpress_mc_pos_data *end_optimum_ptr;
+ const struct lz_match *matches;
+ unsigned num_matches;
+ unsigned longest_len;
+ unsigned literal;
+ u32 cost;
+
+ window_ptr = c->cur_window;
+ window_end = &c->cur_window[c->cur_window_size];
+
+begin:
+ /* Start building a new list of items, which will correspond to the next
+ * piece of the overall minimum-cost path. */
+
+ if (window_ptr == window_end)
+ return;
+
+ cur_optimum_ptr = c->optimum;
+ cur_optimum_ptr->cost = 0;
+ end_optimum_ptr = cur_optimum_ptr;
+
+ /* The following loop runs once for each per byte in the window, except
+ * in a couple shortcut cases. */
+ for (;;) {
+
+ /* Find matches with the current position. */
+ num_matches = xpress_get_matches(c, &matches);
+
+ if (num_matches) {
+
+ longest_len = matches[num_matches - 1].len;
+
+ /* If there's a very long match, choose it immediately.
+ */
+ if (longest_len >= c->params.nice_match_length) {
+
+ xpress_skip_bytes(c, longest_len - 1);
+ window_ptr += longest_len;
+
+ if (cur_optimum_ptr != c->optimum)
+ xpress_declare_item_list(c, cur_optimum_ptr,
+ next_chosen_item);
+
+ xpress_declare_match(c, longest_len,
+ matches[num_matches - 1].offset,
+ next_chosen_item);
+ goto begin;
+ }
+
+ /* If reaching any positions for the first time,
+ * initialize their costs to "infinity". */
+ while (end_optimum_ptr < cur_optimum_ptr + longest_len)
+ (++end_optimum_ptr)->cost = MC_INFINITE_COST;
+
+ /* Consider coding a match. */
+ xpress_consider_matches(c, cur_optimum_ptr,
+ matches, num_matches);
+ } else {
+ /* No matches found. The only choice at this position
+ * is to code a literal. */
+
+ if (end_optimum_ptr == cur_optimum_ptr) {
+ #if 1
+ /* Optimization for single literals. */
+ if (likely(cur_optimum_ptr == c->optimum)) {
+ xpress_declare_literal(c, *window_ptr++,
+ next_chosen_item);
+ if (window_ptr == window_end)
+ return;
+ continue;
+ }
+ #endif
+ (++end_optimum_ptr)->cost = MC_INFINITE_COST;
+ }
+ }
+
+ /* Consider coding a literal. */
+ literal = *window_ptr++;
+ cost = cur_optimum_ptr->cost + c->costs[literal];
+ if (cost < (cur_optimum_ptr + 1)->cost) {
+ (cur_optimum_ptr + 1)->cost = cost;
+ (cur_optimum_ptr + 1)->mc_item_data =
+ ((u32)literal << MC_OFFSET_SHIFT) | 1;
+ }
+
+ /* Advance to the next position. */
+ cur_optimum_ptr++;
+
+ /*
+ * This loop will terminate when either of the following
+ * conditions is true:
+ *
+ * (1) cur_optimum_ptr == end_optimum_ptr
+ *
+ * There are no paths that extend beyond the current
+ * position. In this case, any path to a later position
+ * must pass through the current position, so we can go
+ * ahead and choose the list of items that led to this
+ * position.
+ *
+ * (2) cur_optimum_ptr == &c->optimum[XPRESS_OPTIM_ARRAY_LENGTH]
+ *
+ * This bounds the number of times the algorithm can step
+ * forward before it is guaranteed to start choosing items.
+ * This limits the memory usage. But
+ * XPRESS_OPTIM_ARRAY_LENGTH is high enough that on most
+ * inputs this limit is never reached.
+ *
+ * Note: no check for end-of-window is needed because
+ * end-of-window will trigger condition (1).
+ */
+ if (cur_optimum_ptr == end_optimum_ptr ||
+ cur_optimum_ptr == &c->optimum[XPRESS_OPTIM_ARRAY_LENGTH])
+ break;
+ }
+
+ /* Choose the current list of items that constitute the minimum-cost
+ * path to the current position. */
+ xpress_declare_item_list(c, cur_optimum_ptr, next_chosen_item);
+ goto begin;
+}
+
+/* Near-optimal parsing */
+static u32
+xpress_choose_near_optimal_items(struct xpress_compressor *c)
+{
+ u32 num_passes_remaining = c->params.num_optim_passes;
+ struct xpress_item *next_chosen_item;
+ struct xpress_item **next_chosen_item_ptr;
+
+ /* Choose appropriate match-finder wrapper functions. */
+ if (c->params.num_optim_passes > 1) {
+ c->get_matches_func = xpress_get_matches_fillcache;
+ c->skip_bytes_func = xpress_skip_bytes_fillcache;
+ } else {
+ c->get_matches_func = xpress_get_matches_noncaching;
+ c->skip_bytes_func = xpress_skip_bytes_noncaching;
+ }
+
+ /* The first optimization pass will use a default cost model. Each
+ * additional optimization pass will use a cost model computed from the
+ * previous pass.