+/* Fast heuristic cost evaluation to use in the inner loop of the match-finder.
+ * Unlike lzms_get_lz_match_cost(), which does a true cost evaluation, this
+ * simply prioritize matches based on their offset. */
+static input_idx_t
+lzms_lz_match_cost_fast(input_idx_t length, input_idx_t offset, const void *_lru)
+{
+ const struct lzms_lz_lru_queues *lru = _lru;
+
+ for (input_idx_t i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++)
+ if (offset == lru->recent_offsets[i])
+ return i;
+
+ return offset;
+}
+
+#define LZMS_COST_SHIFT 5
+
+/*#define LZMS_RC_COSTS_USE_FLOATING_POINT*/
+
+static u32
+lzms_rc_costs[LZMS_PROBABILITY_MAX + 1];
+
+#ifdef LZMS_RC_COSTS_USE_FLOATING_POINT
+# include <math.h>
+#endif
+
+static void
+lzms_do_init_rc_costs(void)
+{
+ /* Fill in a table that maps range coding probabilities needed to code a
+ * bit X (0 or 1) to the number of bits (scaled by a constant factor, to
+ * handle fractional costs) needed to code that bit X.
+ *
+ * Consider the range of the range decoder. To eliminate exactly half
+ * the range (logical probability of 0.5), we need exactly 1 bit. For
+ * lower probabilities we need more bits and for higher probabilities we
+ * need fewer bits. In general, a logical probability of N will
+ * eliminate the proportion 1 - N of the range; this information takes
+ * log2(1 / N) bits to encode.
+ *
+ * The below loop is simply calculating this number of bits for each
+ * possible probability allowed by the LZMS compression format, but
+ * without using real numbers. To handle fractional probabilities, each
+ * cost is multiplied by (1 << LZMS_COST_SHIFT). These techniques are
+ * based on those used by LZMA.
+ *
+ * Note that in LZMS, a probability x really means x / 64, and 0 / 64 is
+ * really interpreted as 1 / 64 and 64 / 64 is really interpreted as
+ * 63 / 64.
+ */
+ for (u32 i = 0; i <= LZMS_PROBABILITY_MAX; i++) {
+ u32 prob = i;
+
+ if (prob == 0)
+ prob = 1;
+ else if (prob == LZMS_PROBABILITY_MAX)
+ prob = LZMS_PROBABILITY_MAX - 1;
+
+ #ifdef LZMS_RC_COSTS_USE_FLOATING_POINT
+ lzms_rc_costs[i] = log2((double)LZMS_PROBABILITY_MAX / prob) *
+ (1 << LZMS_COST_SHIFT);
+ #else
+ u32 w = prob;
+ u32 bit_count = 0;
+ for (u32 j = 0; j < LZMS_COST_SHIFT; j++) {
+ w *= w;
+ bit_count <<= 1;
+ while (w >= (1U << 16)) {
+ w >>= 1;
+ ++bit_count;
+ }
+ }
+ lzms_rc_costs[i] = (LZMS_PROBABILITY_BITS << LZMS_COST_SHIFT) -
+ (15 + bit_count);
+ #endif
+ }
+}
+
+static void
+lzms_init_rc_costs(void)
+{
+ static bool done = false;
+ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
+ if (unlikely(!done)) {
+ pthread_mutex_lock(&mutex);
+ if (!done) {
+ lzms_do_init_rc_costs();
+ done = true;
+ }
+ pthread_mutex_unlock(&mutex);
+ }
+}
+
+/*
+ * Return the cost to range-encode the specified bit when in the specified
+ * state.
+ *
+ * @enc The range encoder to use.
+ * @cur_state Current state, which indicates the probability entry to choose.
+ * Updated by this function.
+ * @bit The bit to encode (0 or 1).
+ */
+static u32
+lzms_rc_bit_cost(const struct lzms_range_encoder *enc, u8 *cur_state, int bit)
+{
+ u32 prob_zero;
+ u32 prob_correct;
+
+ prob_zero = enc->prob_entries[*cur_state & enc->mask].num_recent_zero_bits;
+
+ *cur_state = (*cur_state << 1) | bit;
+
+ if (bit == 0)
+ prob_correct = prob_zero;
+ else
+ prob_correct = LZMS_PROBABILITY_MAX - prob_zero;
+
+ return lzms_rc_costs[prob_correct];
+}
+
+static u32
+lzms_huffman_symbol_cost(const struct lzms_huffman_encoder *enc, u32 sym)
+{
+ return enc->lens[sym] << LZMS_COST_SHIFT;
+}
+
+static u32
+lzms_offset_cost(const struct lzms_huffman_encoder *enc, u32 offset)
+{
+ u32 slot;
+ u32 num_extra_bits;
+ u32 cost = 0;
+
+ slot = lzms_get_position_slot(offset);
+
+ cost += lzms_huffman_symbol_cost(enc, slot);
+
+ num_extra_bits = lzms_extra_position_bits[slot];
+
+ cost += num_extra_bits << LZMS_COST_SHIFT;
+
+ return cost;
+}