+static u32
+lzms_rc_costs[LZMS_PROBABILITY_MAX + 1];
+
+#ifdef LZMS_RC_COSTS_USE_FLOATING_POINT
+# include <math.h>
+#endif
+
+static void
+lzms_do_init_rc_costs(void)
+{
+ /* Fill in a table that maps range coding probabilities needed to code a
+ * bit X (0 or 1) to the number of bits (scaled by a constant factor, to
+ * handle fractional costs) needed to code that bit X.
+ *
+ * Consider the range of the range decoder. To eliminate exactly half
+ * the range (logical probability of 0.5), we need exactly 1 bit. For
+ * lower probabilities we need more bits and for higher probabilities we
+ * need fewer bits. In general, a logical probability of N will
+ * eliminate the proportion 1 - N of the range; this information takes
+ * log2(1 / N) bits to encode.
+ *
+ * The below loop is simply calculating this number of bits for each
+ * possible probability allowed by the LZMS compression format, but
+ * without using real numbers. To handle fractional probabilities, each
+ * cost is multiplied by (1 << LZMS_COST_SHIFT). These techniques are
+ * based on those used by LZMA.
+ *
+ * Note that in LZMS, a probability x really means x / 64, and 0 / 64 is
+ * really interpreted as 1 / 64 and 64 / 64 is really interpreted as
+ * 63 / 64.
+ */
+ for (u32 i = 0; i <= LZMS_PROBABILITY_MAX; i++) {
+ u32 prob = i;
+
+ if (prob == 0)
+ prob = 1;
+ else if (prob == LZMS_PROBABILITY_MAX)
+ prob = LZMS_PROBABILITY_MAX - 1;
+
+ #ifdef LZMS_RC_COSTS_USE_FLOATING_POINT
+ lzms_rc_costs[i] = log2((double)LZMS_PROBABILITY_MAX / prob) *
+ (1 << LZMS_COST_SHIFT);
+ #else
+ u32 w = prob;
+ u32 bit_count = 0;
+ for (u32 j = 0; j < LZMS_COST_SHIFT; j++) {
+ w *= w;
+ bit_count <<= 1;
+ while (w >= (1U << 16)) {
+ w >>= 1;
+ ++bit_count;
+ }
+ }
+ lzms_rc_costs[i] = (LZMS_PROBABILITY_BITS << LZMS_COST_SHIFT) -
+ (15 + bit_count);
+ #endif
+ }
+}
+
+static void
+lzms_init_rc_costs(void)
+{
+ static pthread_once_t once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&once, lzms_do_init_rc_costs);
+}
+
+/*
+ * Return the cost to range-encode the specified bit when in the specified
+ * state.
+ *
+ * @enc The range encoder to use.
+ * @cur_state Current state, which indicates the probability entry to choose.
+ * Updated by this function.
+ * @bit The bit to encode (0 or 1).
+ */
+static u32
+lzms_rc_bit_cost(const struct lzms_range_encoder *enc, u8 *cur_state, int bit)
+{
+ u32 prob_zero;
+ u32 prob_correct;
+
+ prob_zero = enc->prob_entries[*cur_state & enc->mask].num_recent_zero_bits;
+
+ *cur_state = (*cur_state << 1) | bit;
+
+ if (bit == 0)
+ prob_correct = prob_zero;
+ else
+ prob_correct = LZMS_PROBABILITY_MAX - prob_zero;
+
+ return lzms_rc_costs[prob_correct];
+}
+
+static u32
+lzms_huffman_symbol_cost(const struct lzms_huffman_encoder *enc, u32 sym)
+{
+ return enc->lens[sym] << LZMS_COST_SHIFT;
+}
+
+static u32
+lzms_offset_cost(const struct lzms_huffman_encoder *enc, u32 offset)
+{
+ u32 slot;
+ u32 num_extra_bits;
+ u32 cost = 0;
+
+ slot = lzms_get_position_slot(offset);
+
+ cost += lzms_huffman_symbol_cost(enc, slot);
+
+ num_extra_bits = lzms_extra_position_bits[slot];
+
+ cost += num_extra_bits << LZMS_COST_SHIFT;
+
+ return cost;
+}
+
+static u32
+lzms_length_cost(const struct lzms_huffman_encoder *enc, u32 length)
+{
+ u32 slot;
+ u32 num_extra_bits;
+ u32 cost = 0;
+
+ slot = lzms_get_length_slot(length);
+
+ cost += lzms_huffman_symbol_cost(enc, slot);
+
+ num_extra_bits = lzms_extra_length_bits[slot];
+
+ cost += num_extra_bits << LZMS_COST_SHIFT;
+
+ return cost;
+}
+
+static u32
+lzms_get_matches(struct lzms_compressor *ctx,
+ const struct lzms_adaptive_state *state,
+ struct raw_match **matches_ret)
+{
+ *matches_ret = ctx->matches;
+ return lz_sarray_get_matches(&ctx->lz_sarray,
+ ctx->matches,
+ lzms_lz_match_cost_fast,
+ &state->lru);
+}
+
+static void
+lzms_skip_bytes(struct lzms_compressor *ctx, input_idx_t n)
+{
+ while (n--)
+ lz_sarray_skip_position(&ctx->lz_sarray);
+}
+
+static u32
+lzms_get_prev_literal_cost(struct lzms_compressor *ctx,
+ struct lzms_adaptive_state *state)
+{
+ u8 literal = ctx->window[lz_sarray_get_pos(&ctx->lz_sarray) - 1];
+ u32 cost = 0;
+
+ state->lru.upcoming_offset = 0;
+ lzms_update_lz_lru_queues(&state->lru);
+
+ cost += lzms_rc_bit_cost(&ctx->main_range_encoder,
+ &state->main_state, 0);
+
+ cost += lzms_huffman_symbol_cost(&ctx->literal_encoder, literal);
+
+ return cost;
+}
+
+static u32
+lzms_get_lz_match_cost(struct lzms_compressor *ctx,
+ struct lzms_adaptive_state *state,
+ input_idx_t length, input_idx_t offset)
+{
+ u32 cost = 0;
+ int recent_offset_idx;
+
+ cost += lzms_rc_bit_cost(&ctx->main_range_encoder,
+ &state->main_state, 1);
+ cost += lzms_rc_bit_cost(&ctx->match_range_encoder,
+ &state->match_state, 0);
+
+ for (recent_offset_idx = 0;
+ recent_offset_idx < LZMS_NUM_RECENT_OFFSETS;
+ recent_offset_idx++)
+ if (offset == state->lru.recent_offsets[recent_offset_idx])
+ break;
+
+ if (recent_offset_idx == LZMS_NUM_RECENT_OFFSETS) {
+ /* Explicit offset. */
+ cost += lzms_rc_bit_cost(&ctx->lz_match_range_encoder,
+ &state->lz_match_state, 0);
+
+ cost += lzms_offset_cost(&ctx->lz_offset_encoder, offset);
+ } else {
+ int i;
+
+ /* Recent offset. */
+ cost += lzms_rc_bit_cost(&ctx->lz_match_range_encoder,
+ &state->lz_match_state, 1);
+
+ for (i = 0; i < recent_offset_idx; i++)
+ cost += lzms_rc_bit_cost(&ctx->lz_repeat_match_range_encoders[i],
+ &state->lz_repeat_match_state[i], 0);
+
+ if (i < LZMS_NUM_RECENT_OFFSETS - 1)
+ cost += lzms_rc_bit_cost(&ctx->lz_repeat_match_range_encoders[i],
+ &state->lz_repeat_match_state[i], 1);
+
+
+ /* Initial update of the LZ match offset LRU queue. */
+ for (; i < LZMS_NUM_RECENT_OFFSETS; i++)
+ state->lru.recent_offsets[i] = state->lru.recent_offsets[i + 1];
+ }
+
+ cost += lzms_length_cost(&ctx->length_encoder, length);
+
+ state->lru.upcoming_offset = offset;
+ lzms_update_lz_lru_queues(&state->lru);
+
+ return cost;
+}
+
+static struct raw_match
+lzms_get_near_optimal_match(struct lzms_compressor *ctx)
+{
+ struct lzms_adaptive_state initial_state;
+
+ initial_state.lru = ctx->lru.lz;
+ initial_state.main_state = ctx->main_range_encoder.state;
+ initial_state.match_state = ctx->match_range_encoder.state;
+ initial_state.lz_match_state = ctx->lz_match_range_encoder.state;
+ for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS - 1; i++)
+ initial_state.lz_repeat_match_state[i] =
+ ctx->lz_repeat_match_range_encoders[i].state;
+ return lz_get_near_optimal_match(&ctx->mc,
+ lzms_get_matches,
+ lzms_skip_bytes,
+ lzms_get_prev_literal_cost,
+ lzms_get_lz_match_cost,
+ ctx,
+ &initial_state);
+}
+
+/*
+ * The main loop for the LZMS compressor.
+ *
+ * Notes:
+ *
+ * - This uses near-optimal LZ parsing backed by a suffix-array match-finder.
+ * More details can be found in the corresponding files (lz_optimal.h,
+ * lz_sarray.{h,c}).
+ *
+ * - This does not output any delta matches. It would take a specialized
+ * algorithm to find them, then more code in lz_optimal.h and here to handle
+ * evaluating and outputting them.
+ *
+ * - The costs of literals and matches are estimated using the range encoder
+ * states and the semi-adaptive Huffman codes. Except for range encoding
+ * states, costs are assumed to be constant throughout a single run of the
+ * parsing algorithm, which can parse up to @optim_array_length (from the
+ * `struct wimlib_lzms_compressor_params') bytes of data. This introduces a
+ * source of inaccuracy because the probabilities and Huffman codes can change
+ * over this part of the data.
+ */
+static void
+lzms_encode(struct lzms_compressor *ctx)
+{
+ struct raw_match match;
+
+ /* Load window into suffix array match-finder. */
+ lz_sarray_load_window(&ctx->lz_sarray, ctx->window, ctx->window_size);
+
+ /* Reset the match-chooser. */
+ lz_match_chooser_begin(&ctx->mc);
+
+ while (ctx->cur_window_pos != ctx->window_size) {
+ match = lzms_get_near_optimal_match(ctx);
+ if (match.len <= 1)
+ lzms_encode_literal(ctx, ctx->window[ctx->cur_window_pos]);
+ else
+ lzms_encode_lz_match(ctx, match.len, match.offset);
+ }