+ return lzms_rc_costs[prob_correct];
+}
+
+/* Return the cost to Huffman-encode the specified symbol. */
+static inline u32
+lzms_huffman_symbol_cost(const struct lzms_huffman_encoder *enc, unsigned sym)
+{
+ return (u32)enc->lens[sym] << LZMS_COST_SHIFT;
+}
+
+/* Return the cost to encode the specified literal byte. */
+static inline u32
+lzms_literal_cost(const struct lzms_compressor *c, unsigned literal,
+ const struct lzms_adaptive_state *state)
+{
+ return lzms_rc_bit_cost(&c->main_range_encoder, state->main_state, 0) +
+ lzms_huffman_symbol_cost(&c->literal_encoder, literal);
+}
+
+/* Update the table that directly provides the costs for small lengths. */
+static void
+lzms_update_fast_length_costs(struct lzms_compressor *c)
+{
+ u32 len;
+ int slot = -1;
+ u32 cost = 0;
+
+ for (len = 1; len < LZMS_NUM_FAST_LENGTHS; len++) {
+
+ while (len >= lzms_length_slot_base[slot + 1]) {
+ slot++;
+ cost = (u32)(c->length_encoder.lens[slot] +
+ lzms_extra_length_bits[slot]) << LZMS_COST_SHIFT;
+ }
+
+ c->length_cost_fast[len] = cost;
+ }
+}
+
+/* Return the cost to encode the specified match length, which must be less than
+ * LZMS_NUM_FAST_LENGTHS. */
+static inline u32
+lzms_fast_length_cost(const struct lzms_compressor *c, u32 length)
+{
+ LZMS_ASSERT(length < LZMS_NUM_FAST_LENGTHS);
+ return c->length_cost_fast[length];
+}
+
+/* Return the cost to encode the specified LZ match offset. */
+static inline u32
+lzms_lz_offset_cost(const struct lzms_compressor *c, u32 offset)
+{
+ unsigned slot = lzms_get_offset_slot_fast(c, offset);
+
+ return (u32)(c->lz_offset_encoder.lens[slot] +
+ lzms_extra_offset_bits[slot]) << LZMS_COST_SHIFT;
+}
+
+/*
+ * Consider coding the match at repeat offset index @rep_idx. Consider each
+ * length from the minimum (2) to the full match length (@rep_len).
+ */
+static inline void
+lzms_consider_lz_repeat_offset_match(const struct lzms_compressor *c,
+ struct lzms_mc_pos_data *cur_optimum_ptr,
+ u32 rep_len, unsigned rep_idx)
+{
+ u32 len;
+ u32 base_cost;
+ u32 cost;
+ unsigned i;
+
+ base_cost = cur_optimum_ptr->cost;
+
+ base_cost += lzms_rc_bit_cost(&c->main_range_encoder,
+ cur_optimum_ptr->state.main_state, 1);
+
+ base_cost += lzms_rc_bit_cost(&c->match_range_encoder,
+ cur_optimum_ptr->state.match_state, 0);
+
+ base_cost += lzms_rc_bit_cost(&c->lz_match_range_encoder,
+ cur_optimum_ptr->state.lz_match_state, 1);
+
+ for (i = 0; i < rep_idx; i++)
+ base_cost += lzms_rc_bit_cost(&c->lz_repeat_match_range_encoders[i],
+ cur_optimum_ptr->state.lz_repeat_match_state[i], 1);
+
+ if (i < LZMS_NUM_RECENT_OFFSETS - 1)
+ base_cost += lzms_rc_bit_cost(&c->lz_repeat_match_range_encoders[i],
+ cur_optimum_ptr->state.lz_repeat_match_state[i], 0);
+
+ len = 2;
+ do {
+ cost = base_cost + lzms_fast_length_cost(c, len);
+ if (cost < (cur_optimum_ptr + len)->cost) {
+ (cur_optimum_ptr + len)->mc_item_data =
+ ((u64)rep_idx << MC_OFFSET_SHIFT) | len;
+ (cur_optimum_ptr + len)->cost = cost;
+ }
+ } while (++len <= rep_len);
+}
+
+/*
+ * Consider coding each match in @matches as an explicit offset match.
+ *
+ * @matches must be sorted by strictly increasing length and strictly increasing
+ * offset. This is guaranteed by the match-finder.
+ *
+ * We consider each length from the minimum (2) to the longest
+ * (matches[num_matches - 1].len). For each length, we consider only the
+ * smallest offset for which that length is available. Although this is not
+ * guaranteed to be optimal due to the possibility of a larger offset costing
+ * less than a smaller offset to code, this is a very useful heuristic.
+ */
+static inline void
+lzms_consider_lz_explicit_offset_matches(const struct lzms_compressor *c,
+ struct lzms_mc_pos_data *cur_optimum_ptr,
+ const struct lz_match matches[],
+ u32 num_matches)
+{
+ u32 len;
+ u32 i;
+ u32 base_cost;
+ u32 position_cost;
+ u32 cost;
+
+ base_cost = cur_optimum_ptr->cost;
+
+ base_cost += lzms_rc_bit_cost(&c->main_range_encoder,
+ cur_optimum_ptr->state.main_state, 1);
+
+ base_cost += lzms_rc_bit_cost(&c->match_range_encoder,
+ cur_optimum_ptr->state.match_state, 0);
+
+ base_cost += lzms_rc_bit_cost(&c->lz_match_range_encoder,
+ cur_optimum_ptr->state.lz_match_state, 0);
+ len = 2;
+ i = 0;
+ do {
+ position_cost = base_cost + lzms_lz_offset_cost(c, matches[i].offset);
+ do {
+ cost = position_cost + lzms_fast_length_cost(c, len);
+ if (cost < (cur_optimum_ptr + len)->cost) {
+ (cur_optimum_ptr + len)->mc_item_data =
+ ((u64)(matches[i].offset + LZMS_OFFSET_OFFSET)
+ << MC_OFFSET_SHIFT) | len;
+ (cur_optimum_ptr + len)->cost = cost;
+ }
+ } while (++len <= matches[i].len);
+ } while (++i != num_matches);
+}
+
+static void
+lzms_init_adaptive_state(struct lzms_adaptive_state *state)
+{
+ unsigned i;
+
+ lzms_init_lz_lru_queue(&state->lru);
+ state->main_state = 0;
+ state->match_state = 0;
+ state->lz_match_state = 0;
+ for (i = 0; i < LZMS_NUM_RECENT_OFFSETS - 1; i++)
+ state->lz_repeat_match_state[i] = 0;
+}
+
+static inline void
+lzms_update_main_state(struct lzms_adaptive_state *state, int is_match)
+{
+ state->main_state = ((state->main_state << 1) | is_match) % LZMS_NUM_MAIN_STATES;
+}
+
+static inline void
+lzms_update_match_state(struct lzms_adaptive_state *state, int is_delta)
+{
+ state->match_state = ((state->match_state << 1) | is_delta) % LZMS_NUM_MATCH_STATES;
+}
+
+static inline void
+lzms_update_lz_match_state(struct lzms_adaptive_state *state, int is_repeat_offset)
+{
+ state->lz_match_state = ((state->lz_match_state << 1) | is_repeat_offset) % LZMS_NUM_LZ_MATCH_STATES;
+}
+
+static inline void
+lzms_update_lz_repeat_match_state(struct lzms_adaptive_state *state, int rep_idx)
+{
+ int i;
+
+ for (i = 0; i < rep_idx; i++)
+ state->lz_repeat_match_state[i] =
+ ((state->lz_repeat_match_state[i] << 1) | 1) %
+ LZMS_NUM_LZ_REPEAT_MATCH_STATES;
+
+ if (i < LZMS_NUM_RECENT_OFFSETS - 1)
+ state->lz_repeat_match_state[i] =
+ ((state->lz_repeat_match_state[i] << 1) | 0) %
+ LZMS_NUM_LZ_REPEAT_MATCH_STATES;
+}
+
+/*
+ * The main near-optimal parsing routine.
+ *
+ * Briefly, the algorithm does an approximate minimum-cost path search to find a
+ * "near-optimal" sequence of matches and literals to output, based on the
+ * current cost model. The algorithm steps forward, position by position (byte
+ * by byte), and updates the minimum cost path to reach each later position that
+ * can be reached using a match or literal from the current position. This is
+ * essentially Dijkstra's algorithm in disguise: the graph nodes are positions,
+ * the graph edges are possible matches/literals to code, and the cost of each
+ * edge is the estimated number of bits that will be required to output the
+ * corresponding match or literal. But one difference is that we actually
+ * compute the lowest-cost path in pieces, where each piece is terminated when
+ * there are no choices to be made.
+ *
+ * Notes:
+ *
+ * - This does not output any delta matches.
+ *
+ * - The costs of literals and matches are estimated using the range encoder
+ * states and the semi-adaptive Huffman codes. Except for range encoding
+ * states, costs are assumed to be constant throughout a single run of the
+ * parsing algorithm, which can parse up to @optim_array_length bytes of data.
+ * This introduces a source of inaccuracy because the probabilities and
+ * Huffman codes can change over this part of the data.
+ */
+static void
+lzms_near_optimal_parse(struct lzms_compressor *c)
+{
+ const u8 *window_ptr;
+ const u8 *window_end;
+ struct lzms_mc_pos_data *cur_optimum_ptr;
+ struct lzms_mc_pos_data *end_optimum_ptr;
+ u32 num_matches;
+ u32 longest_len;
+ u32 rep_max_len;
+ unsigned rep_max_idx;
+ unsigned literal;
+ unsigned i;
+ u32 cost;
+ u32 len;
+ u32 offset_data;
+
+ window_ptr = c->cur_window;
+ window_end = window_ptr + c->cur_window_size;
+
+ lzms_init_adaptive_state(&c->optimum[0].state);
+
+begin:
+ /* Start building a new list of items, which will correspond to the next
+ * piece of the overall minimum-cost path. */
+
+ cur_optimum_ptr = c->optimum;
+ cur_optimum_ptr->cost = 0;
+ end_optimum_ptr = cur_optimum_ptr;
+
+ /* States should currently be consistent with the encoders. */
+ LZMS_ASSERT(cur_optimum_ptr->state.main_state == c->main_range_encoder.state);
+ LZMS_ASSERT(cur_optimum_ptr->state.match_state == c->match_range_encoder.state);
+ LZMS_ASSERT(cur_optimum_ptr->state.lz_match_state == c->lz_match_range_encoder.state);
+ for (i = 0; i < LZMS_NUM_RECENT_OFFSETS - 1; i++)
+ LZMS_ASSERT(cur_optimum_ptr->state.lz_repeat_match_state[i] ==
+ c->lz_repeat_match_range_encoders[i].state);
+
+ if (window_ptr == window_end)
+ return;
+
+ /* The following loop runs once for each per byte in the window, except
+ * in a couple shortcut cases. */
+ for (;;) {
+
+ /* Find explicit offset matches with the current position. */
+ num_matches = lz_mf_get_matches(c->mf, c->matches);
+
+ if (num_matches) {
+ /*
+ * Find the longest repeat offset match with the current
+ * position.
+ *
+ * Heuristics:
+ *
+ * - Only search for repeat offset matches if the
+ * match-finder already found at least one match.
+ *
+ * - Only consider the longest repeat offset match. It
+ * seems to be rare for the optimal parse to include a
+ * repeat offset match that doesn't have the longest
+ * length (allowing for the possibility that not all
+ * of that length is actually used).
+ */
+ if (likely(window_ptr - c->cur_window >= LZMS_MAX_INIT_RECENT_OFFSET)) {
+ BUILD_BUG_ON(LZMS_NUM_RECENT_OFFSETS != 3);
+ rep_max_len = lz_repsearch3(window_ptr,
+ window_end - window_ptr,
+ cur_optimum_ptr->state.lru.recent_offsets,
+ &rep_max_idx);
+ } else {
+ rep_max_len = 0;
+ }
+
+ if (rep_max_len) {
+ /* If there's a very long repeat offset match,
+ * choose it immediately. */
+ if (rep_max_len >= c->params.nice_match_length) {
+
+ lz_mf_skip_positions(c->mf, rep_max_len - 1);
+ window_ptr += rep_max_len;
+
+ if (cur_optimum_ptr != c->optimum)
+ lzms_encode_item_list(c, cur_optimum_ptr);
+
+ lzms_encode_lz_repeat_offset_match(c, rep_max_len,
+ rep_max_idx);
+
+ c->optimum[0].state = cur_optimum_ptr->state;
+
+ lzms_update_main_state(&c->optimum[0].state, 1);
+ lzms_update_match_state(&c->optimum[0].state, 0);
+ lzms_update_lz_match_state(&c->optimum[0].state, 1);
+ lzms_update_lz_repeat_match_state(&c->optimum[0].state,
+ rep_max_idx);
+
+ c->optimum[0].state.lru.upcoming_offset =
+ c->optimum[0].state.lru.recent_offsets[rep_max_idx];
+
+ for (i = rep_max_idx; i < LZMS_NUM_RECENT_OFFSETS; i++)
+ c->optimum[0].state.lru.recent_offsets[i] =
+ c->optimum[0].state.lru.recent_offsets[i + 1];
+
+ lzms_update_lz_lru_queue(&c->optimum[0].state.lru);
+ goto begin;
+ }
+
+ /* If reaching any positions for the first time,
+ * initialize their costs to "infinity". */
+ while (end_optimum_ptr < cur_optimum_ptr + rep_max_len)
+ (++end_optimum_ptr)->cost = MC_INFINITE_COST;
+
+ /* Consider coding a repeat offset match. */
+ lzms_consider_lz_repeat_offset_match(c, cur_optimum_ptr,
+ rep_max_len, rep_max_idx);
+ }
+
+ longest_len = c->matches[num_matches - 1].len;
+
+ /* If there's a very long explicit offset match, choose
+ * it immediately. */
+ if (longest_len >= c->params.nice_match_length) {
+
+ lz_mf_skip_positions(c->mf, longest_len - 1);
+ window_ptr += longest_len;
+
+ if (cur_optimum_ptr != c->optimum)
+ lzms_encode_item_list(c, cur_optimum_ptr);
+
+ lzms_encode_lz_explicit_offset_match(c, longest_len,
+ c->matches[num_matches - 1].offset);
+
+ c->optimum[0].state = cur_optimum_ptr->state;
+
+ lzms_update_main_state(&c->optimum[0].state, 1);
+ lzms_update_match_state(&c->optimum[0].state, 0);
+ lzms_update_lz_match_state(&c->optimum[0].state, 0);
+
+ c->optimum[0].state.lru.upcoming_offset =
+ c->matches[num_matches - 1].offset;
+
+ lzms_update_lz_lru_queue(&c->optimum[0].state.lru);
+ goto begin;
+ }
+
+ /* If reaching any positions for the first time,
+ * initialize their costs to "infinity". */
+ while (end_optimum_ptr < cur_optimum_ptr + longest_len)
+ (++end_optimum_ptr)->cost = MC_INFINITE_COST;
+
+ /* Consider coding an explicit offset match. */
+ lzms_consider_lz_explicit_offset_matches(c, cur_optimum_ptr,
+ c->matches, num_matches);
+ } else {
+ /* No matches found. The only choice at this position
+ * is to code a literal. */
+
+ if (end_optimum_ptr == cur_optimum_ptr)
+ (++end_optimum_ptr)->cost = MC_INFINITE_COST;
+ }
+
+ /* Consider coding a literal.
+
+ * To avoid an extra unpredictable brench, actually checking the
+ * preferability of coding a literal is integrated into the
+ * adaptive state update code below. */
+ literal = *window_ptr++;
+ cost = cur_optimum_ptr->cost +
+ lzms_literal_cost(c, literal, &cur_optimum_ptr->state);
+
+ /* Advance to the next position. */
+ cur_optimum_ptr++;
+
+ /* The lowest-cost path to the current position is now known.
+ * Finalize the adaptive state that results from taking this
+ * lowest-cost path. */
+
+ if (cost < cur_optimum_ptr->cost) {
+ /* Literal */
+ cur_optimum_ptr->cost = cost;
+ cur_optimum_ptr->mc_item_data = ((u64)literal << MC_OFFSET_SHIFT) | 1;
+
+ cur_optimum_ptr->state = (cur_optimum_ptr - 1)->state;
+
+ lzms_update_main_state(&cur_optimum_ptr->state, 0);
+
+ cur_optimum_ptr->state.lru.upcoming_offset = 0;
+ } else {
+ /* LZ match */
+ len = cur_optimum_ptr->mc_item_data & MC_LEN_MASK;
+ offset_data = cur_optimum_ptr->mc_item_data >> MC_OFFSET_SHIFT;
+
+ cur_optimum_ptr->state = (cur_optimum_ptr - len)->state;
+
+ lzms_update_main_state(&cur_optimum_ptr->state, 1);
+ lzms_update_match_state(&cur_optimum_ptr->state, 0);
+
+ if (offset_data >= LZMS_NUM_RECENT_OFFSETS) {
+
+ /* Explicit offset LZ match */
+
+ lzms_update_lz_match_state(&cur_optimum_ptr->state, 0);
+
+ cur_optimum_ptr->state.lru.upcoming_offset =
+ offset_data - LZMS_OFFSET_OFFSET;
+ } else {
+ /* Repeat offset LZ match */
+
+ lzms_update_lz_match_state(&cur_optimum_ptr->state, 1);
+ lzms_update_lz_repeat_match_state(&cur_optimum_ptr->state,
+ offset_data);
+
+ cur_optimum_ptr->state.lru.upcoming_offset =
+ cur_optimum_ptr->state.lru.recent_offsets[offset_data];
+
+ for (i = offset_data; i < LZMS_NUM_RECENT_OFFSETS; i++)
+ cur_optimum_ptr->state.lru.recent_offsets[i] =
+ cur_optimum_ptr->state.lru.recent_offsets[i + 1];
+ }
+ }
+
+ lzms_update_lz_lru_queue(&cur_optimum_ptr->state.lru);
+
+ /*
+ * This loop will terminate when either of the following
+ * conditions is true:
+ *
+ * (1) cur_optimum_ptr == end_optimum_ptr
+ *
+ * There are no paths that extend beyond the current
+ * position. In this case, any path to a later position
+ * must pass through the current position, so we can go
+ * ahead and choose the list of items that led to this
+ * position.
+ *
+ * (2) cur_optimum_ptr == c->optimum_end
+ *
+ * This bounds the number of times the algorithm can step
+ * forward before it is guaranteed to start choosing items.
+ * This limits the memory usage. It also guarantees that
+ * the parser will not go too long without updating the
+ * probability tables.
+ *
+ * Note: no check for end-of-window is needed because
+ * end-of-window will trigger condition (1).
+ */
+ if (cur_optimum_ptr == end_optimum_ptr ||
+ cur_optimum_ptr == c->optimum_end)
+ {
+ c->optimum[0].state = cur_optimum_ptr->state;
+ break;
+ }
+ }
+
+ /* Output the current list of items that constitute the minimum-cost
+ * path to the current position. */
+ lzms_encode_item_list(c, cur_optimum_ptr);
+ goto begin;