+ freqs->main[main_symbol]++;
+
+ /* In an aligned offset block, 3 bits of the position footer are output
+ * as an aligned offset symbol. Account for this, although we may
+ * ultimately decide to output the block as verbatim. */
+
+ /* The following check is equivalent to:
+ *
+ * if (lzx_extra_bits[position_slot] >= 3)
+ *
+ * Note that this correctly excludes position slots that correspond to
+ * recent offsets. */
+ if (position_slot >= 8)
+ freqs->aligned[position_footer & 7]++;
+
+ /* Pack the position slot, position footer, and match length into an
+ * intermediate representation. See `struct lzx_match' for details.
+ */
+ LZX_ASSERT(LZX_MAX_POSITION_SLOTS <= 64);
+ LZX_ASSERT(lzx_get_num_extra_bits(LZX_MAX_POSITION_SLOTS - 1) <= 17);
+ LZX_ASSERT(LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1 <= 256);
+
+ LZX_ASSERT(position_slot <= (1U << (31 - 25)) - 1);
+ LZX_ASSERT(position_footer <= (1U << (25 - 8)) - 1);
+ LZX_ASSERT(adjusted_match_len <= (1U << (8 - 0)) - 1);
+ return 0x80000000 |
+ (position_slot << 25) |
+ (position_footer << 8) |
+ (adjusted_match_len);
+}
+
+struct lzx_record_ctx {
+ struct lzx_freqs freqs;
+ struct lzx_lru_queue queue;
+ struct lzx_match *matches;
+};
+
+static void
+lzx_record_match(unsigned len, unsigned offset, void *_ctx)
+{
+ struct lzx_record_ctx *ctx = _ctx;
+
+ (ctx->matches++)->data = lzx_tally_match(len, offset, &ctx->freqs, &ctx->queue);
+}
+
+static void
+lzx_record_literal(u8 lit, void *_ctx)
+{
+ struct lzx_record_ctx *ctx = _ctx;
+
+ (ctx->matches++)->data = lzx_tally_literal(lit, &ctx->freqs);
+}
+
+/* Returns the cost, in bits, to output a literal byte using the specified cost
+ * model. */
+static unsigned
+lzx_literal_cost(u8 c, const struct lzx_costs * costs)
+{
+ return costs->main[c];
+}
+
+/* Given a (length, offset) pair that could be turned into a valid LZX match as
+ * well as costs for the codewords in the main, length, and aligned Huffman
+ * codes, return the approximate number of bits it will take to represent this
+ * match in the compressed output. Take into account the match offset LRU
+ * queue and optionally update it. */
+static unsigned
+lzx_match_cost(unsigned length, unsigned offset, const struct lzx_costs *costs,
+ struct lzx_lru_queue *queue)
+{
+ unsigned position_slot;
+ unsigned len_header, main_symbol;
+ unsigned cost = 0;
+
+ position_slot = lzx_get_position_slot(offset, queue);
+
+ len_header = min(length - LZX_MIN_MATCH_LEN, LZX_NUM_PRIMARY_LENS);
+ main_symbol = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
+
+ /* Account for main symbol. */
+ cost += costs->main[main_symbol];
+
+ /* Account for extra position information. */
+ unsigned num_extra_bits = lzx_get_num_extra_bits(position_slot);
+ if (num_extra_bits >= 3) {
+ cost += num_extra_bits - 3;
+ cost += costs->aligned[(offset + LZX_OFFSET_OFFSET) & 7];
+ } else {
+ cost += num_extra_bits;
+ }
+
+ /* Account for extra length information. */
+ if (len_header == LZX_NUM_PRIMARY_LENS)
+ cost += costs->len[length - LZX_MIN_MATCH_LEN - LZX_NUM_PRIMARY_LENS];
+
+ return cost;
+
+}
+
+/* Fast heuristic cost evaluation to use in the inner loop of the match-finder.
+ * Unlike lzx_match_cost() which does a true cost evaluation, this simply
+ * prioritize matches based on their offset. */
+static input_idx_t
+lzx_match_cost_fast(input_idx_t length, input_idx_t offset, const void *_queue)
+{
+ const struct lzx_lru_queue *queue = _queue;
+
+ /* It seems well worth it to take the time to give priority to recently
+ * used offsets. */
+ for (input_idx_t i = 0; i < LZX_NUM_RECENT_OFFSETS; i++)
+ if (offset == queue->R[i])
+ return i;
+
+ return offset;
+}
+
+/* Set the cost model @ctx->costs from the Huffman codeword lengths specified in
+ * @lens.
+ *
+ * The cost model and codeword lengths are almost the same thing, but the
+ * Huffman codewords with length 0 correspond to symbols with zero frequency
+ * that still need to be assigned actual costs. The specific values assigned
+ * are arbitrary, but they should be fairly high (near the maximum codeword
+ * length) to take into account the fact that uses of these symbols are expected
+ * to be rare. */
+static void
+lzx_set_costs(struct lzx_compressor * ctx, const struct lzx_lens * lens)
+{
+ unsigned i;
+ unsigned num_main_syms = ctx->num_main_syms;
+
+ /* Main code */
+ for (i = 0; i < num_main_syms; i++) {
+ ctx->costs.main[i] = lens->main[i];
+ if (ctx->costs.main[i] == 0)
+ ctx->costs.main[i] = ctx->params.alg_params.slow.main_nostat_cost;
+ }
+
+ /* Length code */
+ for (i = 0; i < LZX_LENCODE_NUM_SYMBOLS; i++) {
+ ctx->costs.len[i] = lens->len[i];
+ if (ctx->costs.len[i] == 0)
+ ctx->costs.len[i] = ctx->params.alg_params.slow.len_nostat_cost;
+ }
+
+ /* Aligned offset code */
+ for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
+ ctx->costs.aligned[i] = lens->aligned[i];
+ if (ctx->costs.aligned[i] == 0)
+ ctx->costs.aligned[i] = ctx->params.alg_params.slow.aligned_nostat_cost;
+ }
+}
+
+/* Tell the match-finder to skip the specified number of bytes (@n) in the
+ * input. */
+static void
+lzx_lz_skip_bytes(struct lzx_compressor *ctx, input_idx_t n)
+{
+ LZX_ASSERT(n <= ctx->match_window_end - ctx->match_window_pos);
+ if (ctx->matches_cached) {
+ ctx->match_window_pos += n;
+ while (n--) {
+ ctx->cached_matches_pos +=
+ ctx->cached_matches[ctx->cached_matches_pos].len + 1;
+ }
+ } else {
+ while (n--) {
+ ctx->cached_matches[ctx->cached_matches_pos++].len = 0;
+ lz_sarray_skip_position(&ctx->lz_sarray);
+ ctx->match_window_pos++;
+ }
+ LZX_ASSERT(lz_sarray_get_pos(&ctx->lz_sarray) == ctx->match_window_pos);
+ }
+}
+
+/* Retrieve a list of matches available at the next position in the input.
+ *
+ * A pointer to the matches array is written into @matches_ret, and the return
+ * value is the number of matches found. */
+static u32
+lzx_lz_get_matches_caching(struct lzx_compressor *ctx,
+ const struct lzx_lru_queue *queue,
+ struct raw_match **matches_ret)
+{
+ u32 num_matches;
+ struct raw_match *matches;
+
+ LZX_ASSERT(ctx->match_window_pos <= ctx->match_window_end);
+
+ matches = &ctx->cached_matches[ctx->cached_matches_pos + 1];
+
+ if (ctx->matches_cached) {
+ num_matches = matches[-1].len;
+ } else {
+ LZX_ASSERT(lz_sarray_get_pos(&ctx->lz_sarray) == ctx->match_window_pos);
+ num_matches = lz_sarray_get_matches(&ctx->lz_sarray,
+ matches,
+ lzx_match_cost_fast,
+ queue);
+ matches[-1].len = num_matches;
+ }
+ ctx->cached_matches_pos += num_matches + 1;
+ *matches_ret = matches;
+
+ /* Cap the length of returned matches to the number of bytes remaining,
+ * if it is not the whole window. */
+ if (ctx->match_window_end < ctx->window_size) {
+ unsigned maxlen = ctx->match_window_end - ctx->match_window_pos;
+ for (u32 i = 0; i < num_matches; i++)
+ if (matches[i].len > maxlen)
+ matches[i].len = maxlen;
+ }
+#if 0
+ fprintf(stderr, "Pos %u/%u: %u matches\n",
+ ctx->match_window_pos, ctx->match_window_end, num_matches);
+ for (unsigned i = 0; i < num_matches; i++)
+ fprintf(stderr, "\tLen %u Offset %u\n", matches[i].len, matches[i].offset);
+#endif
+
+#ifdef ENABLE_LZX_DEBUG
+ for (u32 i = 0; i < num_matches; i++) {
+ LZX_ASSERT(matches[i].len >= LZX_MIN_MATCH_LEN);
+ LZX_ASSERT(matches[i].len <= LZX_MAX_MATCH_LEN);
+ LZX_ASSERT(matches[i].len <= ctx->match_window_end - ctx->match_window_pos);
+ LZX_ASSERT(matches[i].offset > 0);
+ LZX_ASSERT(matches[i].offset <= ctx->match_window_pos);
+ LZX_ASSERT(!memcmp(&ctx->window[ctx->match_window_pos],
+ &ctx->window[ctx->match_window_pos - matches[i].offset],
+ matches[i].len));
+ }
+#endif
+
+ ctx->match_window_pos++;
+ return num_matches;
+}
+
+static u32
+lzx_get_prev_literal_cost(struct lzx_compressor *ctx,
+ struct lzx_lru_queue *queue)
+{
+ return lzx_literal_cost(ctx->window[ctx->match_window_pos - 1],
+ &ctx->costs);
+}
+
+static u32
+lzx_get_match_cost(struct lzx_compressor *ctx,
+ struct lzx_lru_queue *queue,
+ input_idx_t length, input_idx_t offset)
+{
+ return lzx_match_cost(length, offset, &ctx->costs, queue);
+}
+
+static struct raw_match
+lzx_lz_get_near_optimal_match(struct lzx_compressor *ctx)
+{
+ return lz_get_near_optimal_match(&ctx->mc,
+ lzx_lz_get_matches_caching,
+ lzx_lz_skip_bytes,
+ lzx_get_prev_literal_cost,
+ lzx_get_match_cost,
+ ctx,
+ &ctx->queue);
+}
+
+/* Set default symbol costs for the LZX Huffman codes. */
+static void
+lzx_set_default_costs(struct lzx_costs * costs, unsigned num_main_syms)
+{
+ unsigned i;
+
+ /* Main code (part 1): Literal symbols */
+ for (i = 0; i < LZX_NUM_CHARS; i++)
+ costs->main[i] = 8;
+
+ /* Main code (part 2): Match header symbols */
+ for (; i < num_main_syms; i++)
+ costs->main[i] = 10;
+
+ /* Length code */
+ for (i = 0; i < LZX_LENCODE_NUM_SYMBOLS; i++)
+ costs->len[i] = 8;
+
+ /* Aligned offset code */
+ for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++)
+ costs->aligned[i] = 3;
+}
+
+/* Given the frequencies of symbols in an LZX-compressed block and the
+ * corresponding Huffman codes, return LZX_BLOCKTYPE_ALIGNED or
+ * LZX_BLOCKTYPE_VERBATIM if an aligned offset or verbatim block, respectively,
+ * will take fewer bits to output. */
+static int
+lzx_choose_verbatim_or_aligned(const struct lzx_freqs * freqs,
+ const struct lzx_codes * codes)
+{
+ unsigned aligned_cost = 0;
+ unsigned verbatim_cost = 0;
+
+ /* Verbatim blocks have a constant 3 bits per position footer. Aligned
+ * offset blocks have an aligned offset symbol per position footer, plus
+ * an extra 24 bits per block to output the lengths necessary to
+ * reconstruct the aligned offset code itself. */
+ for (unsigned i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
+ verbatim_cost += 3 * freqs->aligned[i];
+ aligned_cost += codes->lens.aligned[i] * freqs->aligned[i];
+ }
+ aligned_cost += LZX_ALIGNEDCODE_ELEMENT_SIZE * LZX_ALIGNEDCODE_NUM_SYMBOLS;
+ if (aligned_cost < verbatim_cost)
+ return LZX_BLOCKTYPE_ALIGNED;
+ else
+ return LZX_BLOCKTYPE_VERBATIM;
+}
+
+/* Find a near-optimal sequence of matches/literals with which to output the
+ * specified LZX block, then set the block's type to that which has the minimum
+ * cost to output (either verbatim or aligned). */
+static void
+lzx_optimize_block(struct lzx_compressor *ctx, struct lzx_block_spec *spec,
+ unsigned num_passes)
+{
+ const struct lzx_lru_queue orig_queue = ctx->queue;
+ struct lzx_freqs freqs;
+
+ unsigned orig_window_pos = spec->window_pos;
+ unsigned orig_cached_pos = ctx->cached_matches_pos;
+
+ LZX_ASSERT(ctx->match_window_pos == spec->window_pos);
+
+ ctx->match_window_end = spec->window_pos + spec->block_size;
+ spec->chosen_matches_start_pos = spec->window_pos;
+
+ LZX_ASSERT(num_passes >= 1);
+
+ /* The first optimal parsing pass is done using the cost model already
+ * set in ctx->costs. Each later pass is done using a cost model
+ * computed from the previous pass. */
+ for (unsigned pass = 0; pass < num_passes; pass++) {
+
+ ctx->match_window_pos = orig_window_pos;
+ ctx->cached_matches_pos = orig_cached_pos;
+ ctx->queue = orig_queue;
+ spec->num_chosen_matches = 0;
+ memset(&freqs, 0, sizeof(freqs));
+
+ for (unsigned i = spec->window_pos; i < spec->window_pos + spec->block_size; ) {
+ struct raw_match raw_match;
+ struct lzx_match lzx_match;
+
+ raw_match = lzx_lz_get_near_optimal_match(ctx);
+ if (raw_match.len >= LZX_MIN_MATCH_LEN) {
+ if (unlikely(raw_match.len == LZX_MIN_MATCH_LEN &&
+ raw_match.offset == ctx->max_window_size -
+ LZX_MIN_MATCH_LEN))
+ {
+ /* Degenerate case where the parser
+ * generated the minimum match length
+ * with the maximum offset. There
+ * aren't actually enough position slots
+ * to represent this offset, as noted in
+ * the comments in
+ * lzx_get_num_main_syms(), so we cannot
+ * allow it. Use literals instead.
+ *
+ * Note that this case only occurs if
+ * the match-finder can generate matches
+ * to the very start of the window. The
+ * suffix array match-finder can,
+ * although typical hash chain and
+ * binary tree match-finders use 0 as a
+ * null value and therefore cannot
+ * generate such matches. */
+ BUILD_BUG_ON(LZX_MIN_MATCH_LEN != 2);
+ lzx_match.data = lzx_tally_literal(ctx->window[i],
+ &freqs);
+ i += 1;
+ ctx->chosen_matches[spec->chosen_matches_start_pos +
+ spec->num_chosen_matches++]
+ = lzx_match;
+ lzx_match.data = lzx_tally_literal(ctx->window[i],
+ &freqs);
+ i += 1;
+ } else {
+ lzx_match.data = lzx_tally_match(raw_match.len,
+ raw_match.offset,
+ &freqs,
+ &ctx->queue);
+ i += raw_match.len;
+ }
+ } else {
+ lzx_match.data = lzx_tally_literal(ctx->window[i], &freqs);
+ i += 1;
+ }
+ ctx->chosen_matches[spec->chosen_matches_start_pos +
+ spec->num_chosen_matches++] = lzx_match;
+ }
+
+ lzx_make_huffman_codes(&freqs, &spec->codes,
+ ctx->num_main_syms);
+ if (pass < num_passes - 1)
+ lzx_set_costs(ctx, &spec->codes.lens);
+ ctx->matches_cached = true;
+ }
+ spec->block_type = lzx_choose_verbatim_or_aligned(&freqs, &spec->codes);
+ ctx->matches_cached = false;
+}
+
+static void
+lzx_optimize_blocks(struct lzx_compressor *ctx)
+{
+ lzx_lru_queue_init(&ctx->queue);
+ lz_match_chooser_begin(&ctx->mc);
+
+ const unsigned num_passes = ctx->params.alg_params.slow.num_optim_passes;
+
+ for (unsigned i = 0; i < ctx->num_blocks; i++)
+ lzx_optimize_block(ctx, &ctx->block_specs[i], num_passes);
+}
+
+/* Prepare the input window into one or more LZX blocks ready to be output. */
+static void
+lzx_prepare_blocks(struct lzx_compressor * ctx)
+{
+ /* Initialize the match-finder. */
+ lz_sarray_load_window(&ctx->lz_sarray, ctx->window, ctx->window_size);
+ ctx->cached_matches_pos = 0;
+ ctx->matches_cached = false;
+ ctx->match_window_pos = 0;
+
+ /* Set up a default cost model. */
+ lzx_set_default_costs(&ctx->costs, ctx->num_main_syms);
+
+ /* TODO: The compression ratio could be slightly improved by performing
+ * data-dependent block splitting instead of using fixed-size blocks.
+ * Doing so well is a computationally hard problem, however. */
+ ctx->num_blocks = DIV_ROUND_UP(ctx->window_size, LZX_DIV_BLOCK_SIZE);
+ for (unsigned i = 0; i < ctx->num_blocks; i++) {
+ unsigned pos = LZX_DIV_BLOCK_SIZE * i;
+ ctx->block_specs[i].window_pos = pos;
+ ctx->block_specs[i].block_size = min(ctx->window_size - pos, LZX_DIV_BLOCK_SIZE);
+ }
+
+ /* Determine sequence of matches/literals to output for each block. */
+ lzx_optimize_blocks(ctx);
+}
+
+/*
+ * This is the fast version of lzx_prepare_blocks(). This version "quickly"
+ * prepares a single compressed block containing the entire input. See the
+ * description of the "Fast algorithm" at the beginning of this file for more
+ * information.
+ *
+ * Input --- the preprocessed data:
+ *
+ * ctx->window[]
+ * ctx->window_size
+ *
+ * Output --- the block specification and the corresponding match/literal data:
+ *
+ * ctx->block_specs[]
+ * ctx->num_blocks
+ * ctx->chosen_matches[]
+ */
+static void
+lzx_prepare_block_fast(struct lzx_compressor * ctx)
+{
+ struct lzx_record_ctx record_ctx;
+ struct lzx_block_spec *spec;
+
+ /* Parameters to hash chain LZ match finder
+ * (lazy with 1 match lookahead) */
+ static const struct lz_params lzx_lz_params = {
+ /* Although LZX_MIN_MATCH_LEN == 2, length 2 matches typically
+ * aren't worth choosing when using greedy or lazy parsing. */
+ .min_match = 3,
+ .max_match = LZX_MAX_MATCH_LEN,
+ .max_offset = LZX_MAX_WINDOW_SIZE,
+ .good_match = LZX_MAX_MATCH_LEN,
+ .nice_match = LZX_MAX_MATCH_LEN,
+ .max_chain_len = LZX_MAX_MATCH_LEN,
+ .max_lazy_match = LZX_MAX_MATCH_LEN,
+ .too_far = 4096,
+ };
+
+ /* Initialize symbol frequencies and match offset LRU queue. */
+ memset(&record_ctx.freqs, 0, sizeof(struct lzx_freqs));
+ lzx_lru_queue_init(&record_ctx.queue);
+ record_ctx.matches = ctx->chosen_matches;
+
+ /* Determine series of matches/literals to output. */
+ lz_analyze_block(ctx->window,
+ ctx->window_size,
+ lzx_record_match,
+ lzx_record_literal,
+ &record_ctx,
+ &lzx_lz_params,
+ ctx->prev_tab);
+
+ /* Set up block specification. */
+ spec = &ctx->block_specs[0];
+ spec->block_type = LZX_BLOCKTYPE_ALIGNED;
+ spec->window_pos = 0;
+ spec->block_size = ctx->window_size;
+ spec->num_chosen_matches = (record_ctx.matches - ctx->chosen_matches);
+ spec->chosen_matches_start_pos = 0;
+ lzx_make_huffman_codes(&record_ctx.freqs, &spec->codes,
+ ctx->num_main_syms);
+ ctx->num_blocks = 1;