+/*
+ * Write all matches and literal bytes (which were precomputed) in an LZX
+ * compressed block to the output bitstream in the final compressed
+ * representation.
+ *
+ * @ostream
+ * The output bitstream.
+ * @block_type
+ * The chosen type of the LZX compressed block (LZX_BLOCKTYPE_ALIGNED or
+ * LZX_BLOCKTYPE_VERBATIM).
+ * @match_tab
+ * The array of matches/literals to output.
+ * @match_count
+ * Number of matches/literals to output (length of @match_tab).
+ * @codes
+ * The main, length, and aligned offset Huffman codes for the current
+ * LZX compressed block.
+ */
+static void
+lzx_write_matches_and_literals(struct output_bitstream *ostream,
+ int block_type,
+ const struct lzx_match match_tab[],
+ unsigned match_count,
+ const struct lzx_codes *codes)
+{
+ for (unsigned i = 0; i < match_count; i++) {
+ struct lzx_match match = match_tab[i];
+
+ /* The high bit of the 32-bit intermediate representation
+ * indicates whether the item is an actual LZ-style match (1) or
+ * a literal byte (0). */
+ if (match.data & 0x80000000)
+ lzx_write_match(ostream, block_type, match, codes);
+ else
+ lzx_write_literal(ostream, match.data, codes);
+ }
+}
+
+static void
+lzx_assert_codes_valid(const struct lzx_codes * codes, unsigned num_main_syms)
+{
+#ifdef ENABLE_LZX_DEBUG
+ unsigned i;
+
+ for (i = 0; i < num_main_syms; i++)
+ LZX_ASSERT(codes->lens.main[i] <= LZX_MAX_MAIN_CODEWORD_LEN);
+
+ for (i = 0; i < LZX_LENCODE_NUM_SYMBOLS; i++)
+ LZX_ASSERT(codes->lens.len[i] <= LZX_MAX_LEN_CODEWORD_LEN);
+
+ for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++)
+ LZX_ASSERT(codes->lens.aligned[i] <= LZX_MAX_ALIGNED_CODEWORD_LEN);
+
+ const unsigned tablebits = 10;
+ u16 decode_table[(1 << tablebits) +
+ (2 * max(num_main_syms, LZX_LENCODE_NUM_SYMBOLS))]
+ _aligned_attribute(DECODE_TABLE_ALIGNMENT);
+ LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
+ num_main_syms,
+ min(tablebits, LZX_MAINCODE_TABLEBITS),
+ codes->lens.main,
+ LZX_MAX_MAIN_CODEWORD_LEN));
+ LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
+ LZX_LENCODE_NUM_SYMBOLS,
+ min(tablebits, LZX_LENCODE_TABLEBITS),
+ codes->lens.len,
+ LZX_MAX_LEN_CODEWORD_LEN));
+ LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
+ LZX_ALIGNEDCODE_NUM_SYMBOLS,
+ min(tablebits, LZX_ALIGNEDCODE_TABLEBITS),
+ codes->lens.aligned,
+ LZX_MAX_ALIGNED_CODEWORD_LEN));
+#endif /* ENABLE_LZX_DEBUG */
+}
+
+/* Write an LZX aligned offset or verbatim block to the output. */
+static void
+lzx_write_compressed_block(int block_type,
+ unsigned block_size,
+ unsigned max_window_size,
+ unsigned num_main_syms,
+ struct lzx_match * chosen_matches,
+ unsigned num_chosen_matches,
+ const struct lzx_codes * codes,
+ const struct lzx_codes * prev_codes,
+ struct output_bitstream * ostream)
+{
+ unsigned i;
+
+ LZX_ASSERT(block_type == LZX_BLOCKTYPE_ALIGNED ||
+ block_type == LZX_BLOCKTYPE_VERBATIM);
+ lzx_assert_codes_valid(codes, num_main_syms);
+
+ /* The first three bits indicate the type of block and are one of the
+ * LZX_BLOCKTYPE_* constants. */
+ bitstream_put_bits(ostream, block_type, 3);
+
+ /* Output the block size.
+ *
+ * The original LZX format seemed to always encode the block size in 3
+ * bytes. However, the implementation in WIMGAPI, as used in WIM files,
+ * uses the first bit to indicate whether the block is the default size
+ * (32768) or a different size given explicitly by the next 16 bits.
+ *
+ * By default, this compressor uses a window size of 32768 and therefore
+ * follows the WIMGAPI behavior. However, this compressor also supports
+ * window sizes greater than 32768 bytes, which do not appear to be
+ * supported by WIMGAPI. In such cases, we retain the default size bit
+ * to mean a size of 32768 bytes but output non-default block size in 24
+ * bits rather than 16. The compatibility of this behavior is unknown
+ * because WIMs created with chunk size greater than 32768 can seemingly
+ * only be opened by wimlib anyway. */
+ if (block_size == LZX_DEFAULT_BLOCK_SIZE) {
+ bitstream_put_bits(ostream, 1, 1);
+ } else {
+ bitstream_put_bits(ostream, 0, 1);
+
+ if (max_window_size >= 65536)
+ bitstream_put_bits(ostream, block_size >> 16, 8);
+
+ bitstream_put_bits(ostream, block_size, 16);
+ }
+
+ /* Write out lengths of the main code. Note that the LZX specification
+ * incorrectly states that the aligned offset code comes after the
+ * length code, but in fact it is the very first code to be written
+ * (before the main code). */
+ if (block_type == LZX_BLOCKTYPE_ALIGNED)
+ for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++)
+ bitstream_put_bits(ostream, codes->lens.aligned[i],
+ LZX_ALIGNEDCODE_ELEMENT_SIZE);
+
+ LZX_DEBUG("Writing main code...");
+
+ /* Write the precode and lengths for the first LZX_NUM_CHARS symbols in
+ * the main code, which are the codewords for literal bytes. */
+ lzx_write_compressed_code(ostream,
+ codes->lens.main,
+ prev_codes->lens.main,
+ LZX_NUM_CHARS);
+
+ /* Write the precode and lengths for the rest of the main code, which
+ * are the codewords for match headers. */
+ lzx_write_compressed_code(ostream,
+ codes->lens.main + LZX_NUM_CHARS,
+ prev_codes->lens.main + LZX_NUM_CHARS,
+ num_main_syms - LZX_NUM_CHARS);
+
+ LZX_DEBUG("Writing length code...");
+
+ /* Write the precode and lengths for the length code. */
+ lzx_write_compressed_code(ostream,
+ codes->lens.len,
+ prev_codes->lens.len,
+ LZX_LENCODE_NUM_SYMBOLS);
+
+ LZX_DEBUG("Writing matches and literals...");
+
+ /* Write the actual matches and literals. */
+ lzx_write_matches_and_literals(ostream, block_type,
+ chosen_matches, num_chosen_matches,
+ codes);
+
+ LZX_DEBUG("Done writing block.");
+}
+
+/* Write out the LZX blocks that were computed. */
+static void
+lzx_write_all_blocks(struct lzx_compressor *ctx, struct output_bitstream *ostream)
+{
+
+ const struct lzx_codes *prev_codes = &ctx->zero_codes;
+ for (unsigned i = 0; i < ctx->num_blocks; i++) {
+ const struct lzx_block_spec *spec = &ctx->block_specs[i];
+
+ LZX_DEBUG("Writing block %u/%u (type=%d, size=%u, num_chosen_matches=%u)...",
+ i + 1, ctx->num_blocks,
+ spec->block_type, spec->block_size,
+ spec->num_chosen_matches);
+
+ lzx_write_compressed_block(spec->block_type,
+ spec->block_size,
+ ctx->max_window_size,
+ ctx->num_main_syms,
+ &ctx->chosen_matches[spec->chosen_matches_start_pos],
+ spec->num_chosen_matches,
+ &spec->codes,
+ prev_codes,
+ ostream);
+
+ prev_codes = &spec->codes;
+ }
+}
+
+/* Constructs an LZX match from a literal byte and updates the main code symbol
+ * frequencies. */
+static u32
+lzx_tally_literal(u8 lit, struct lzx_freqs *freqs)
+{
+ freqs->main[lit]++;
+ return (u32)lit;
+}
+
+/* Constructs an LZX match from an offset and a length, and updates the LRU
+ * queue and the frequency of symbols in the main, length, and aligned offset
+ * alphabets. The return value is a 32-bit number that provides the match in an
+ * intermediate representation documented below. */
+static u32
+lzx_tally_match(unsigned match_len, unsigned match_offset,
+ struct lzx_freqs *freqs, struct lzx_lru_queue *queue)
+{
+ unsigned position_slot;
+ unsigned position_footer;
+ u32 len_header;
+ unsigned main_symbol;
+ unsigned len_footer;
+ unsigned adjusted_match_len;
+
+ LZX_ASSERT(match_len >= LZX_MIN_MATCH_LEN && match_len <= LZX_MAX_MATCH_LEN);
+
+ /* The match offset shall be encoded as a position slot (itself encoded
+ * as part of the main symbol) and a position footer. */
+ position_slot = lzx_get_position_slot(match_offset, queue);
+ position_footer = (match_offset + LZX_OFFSET_OFFSET) -
+ lzx_position_base[position_slot];
+
+ /* The match length shall be encoded as a length header (itself encoded
+ * as part of the main symbol) and an optional length footer. */
+ adjusted_match_len = match_len - LZX_MIN_MATCH_LEN;
+ if (adjusted_match_len < LZX_NUM_PRIMARY_LENS) {
+ /* No length footer needed. */
+ len_header = adjusted_match_len;
+ } else {
+ /* Length footer needed. It will be encoded using the length
+ * code. */
+ len_header = LZX_NUM_PRIMARY_LENS;
+ len_footer = adjusted_match_len - LZX_NUM_PRIMARY_LENS;
+ freqs->len[len_footer]++;
+ }
+
+ /* Account for the main symbol. */
+ main_symbol = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
+
+ freqs->main[main_symbol]++;
+
+ /* In an aligned offset block, 3 bits of the position footer are output
+ * as an aligned offset symbol. Account for this, although we may
+ * ultimately decide to output the block as verbatim. */
+
+ /* The following check is equivalent to:
+ *
+ * if (lzx_extra_bits[position_slot] >= 3)
+ *
+ * Note that this correctly excludes position slots that correspond to
+ * recent offsets. */
+ if (position_slot >= 8)
+ freqs->aligned[position_footer & 7]++;
+
+ /* Pack the position slot, position footer, and match length into an
+ * intermediate representation. See `struct lzx_match' for details.
+ */
+ LZX_ASSERT(LZX_MAX_POSITION_SLOTS <= 64);
+ LZX_ASSERT(lzx_get_num_extra_bits(LZX_MAX_POSITION_SLOTS - 1) <= 17);
+ LZX_ASSERT(LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1 <= 256);
+
+ LZX_ASSERT(position_slot <= (1U << (31 - 25)) - 1);
+ LZX_ASSERT(position_footer <= (1U << (25 - 8)) - 1);
+ LZX_ASSERT(adjusted_match_len <= (1U << (8 - 0)) - 1);
+ return 0x80000000 |
+ (position_slot << 25) |
+ (position_footer << 8) |
+ (adjusted_match_len);
+}
+
+struct lzx_record_ctx {
+ struct lzx_freqs freqs;
+ struct lzx_lru_queue queue;
+ struct lzx_match *matches;
+};
+
+static void
+lzx_record_match(unsigned len, unsigned offset, void *_ctx)
+{
+ struct lzx_record_ctx *ctx = _ctx;
+
+ (ctx->matches++)->data = lzx_tally_match(len, offset, &ctx->freqs, &ctx->queue);
+}
+
+static void
+lzx_record_literal(u8 lit, void *_ctx)
+{
+ struct lzx_record_ctx *ctx = _ctx;
+
+ (ctx->matches++)->data = lzx_tally_literal(lit, &ctx->freqs);
+}
+
+/* Returns the cost, in bits, to output a literal byte using the specified cost
+ * model. */
+static unsigned
+lzx_literal_cost(u8 c, const struct lzx_costs * costs)
+{
+ return costs->main[c];
+}
+
+/* Given a (length, offset) pair that could be turned into a valid LZX match as
+ * well as costs for the codewords in the main, length, and aligned Huffman
+ * codes, return the approximate number of bits it will take to represent this
+ * match in the compressed output. Take into account the match offset LRU
+ * queue and optionally update it. */
+static unsigned
+lzx_match_cost(unsigned length, unsigned offset, const struct lzx_costs *costs,
+ struct lzx_lru_queue *queue)
+{
+ unsigned position_slot;
+ unsigned len_header, main_symbol;
+ unsigned cost = 0;
+
+ position_slot = lzx_get_position_slot(offset, queue);
+
+ len_header = min(length - LZX_MIN_MATCH_LEN, LZX_NUM_PRIMARY_LENS);
+ main_symbol = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
+
+ /* Account for main symbol. */
+ cost += costs->main[main_symbol];
+
+ /* Account for extra position information. */
+ unsigned num_extra_bits = lzx_get_num_extra_bits(position_slot);
+ if (num_extra_bits >= 3) {
+ cost += num_extra_bits - 3;
+ cost += costs->aligned[(offset + LZX_OFFSET_OFFSET) & 7];
+ } else {
+ cost += num_extra_bits;
+ }
+
+ /* Account for extra length information. */
+ if (len_header == LZX_NUM_PRIMARY_LENS)
+ cost += costs->len[length - LZX_MIN_MATCH_LEN - LZX_NUM_PRIMARY_LENS];
+
+ return cost;
+
+}
+
+/* Fast heuristic cost evaluation to use in the inner loop of the match-finder.
+ * Unlike lzx_match_cost() which does a true cost evaluation, this simply
+ * prioritize matches based on their offset. */
+static input_idx_t
+lzx_match_cost_fast(input_idx_t length, input_idx_t offset, const void *_queue)
+{
+ const struct lzx_lru_queue *queue = _queue;
+
+ /* It seems well worth it to take the time to give priority to recently
+ * used offsets. */
+ for (input_idx_t i = 0; i < LZX_NUM_RECENT_OFFSETS; i++)
+ if (offset == queue->R[i])
+ return i;
+
+ return offset;
+}
+
+/* Set the cost model @ctx->costs from the Huffman codeword lengths specified in
+ * @lens.
+ *
+ * The cost model and codeword lengths are almost the same thing, but the
+ * Huffman codewords with length 0 correspond to symbols with zero frequency
+ * that still need to be assigned actual costs. The specific values assigned
+ * are arbitrary, but they should be fairly high (near the maximum codeword
+ * length) to take into account the fact that uses of these symbols are expected
+ * to be rare. */
+static void
+lzx_set_costs(struct lzx_compressor * ctx, const struct lzx_lens * lens)
+{
+ unsigned i;
+ unsigned num_main_syms = ctx->num_main_syms;
+
+ /* Main code */
+ for (i = 0; i < num_main_syms; i++) {
+ ctx->costs.main[i] = lens->main[i];
+ if (ctx->costs.main[i] == 0)
+ ctx->costs.main[i] = ctx->params.alg_params.slow.main_nostat_cost;
+ }
+
+ /* Length code */
+ for (i = 0; i < LZX_LENCODE_NUM_SYMBOLS; i++) {
+ ctx->costs.len[i] = lens->len[i];
+ if (ctx->costs.len[i] == 0)
+ ctx->costs.len[i] = ctx->params.alg_params.slow.len_nostat_cost;
+ }
+
+ /* Aligned offset code */
+ for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
+ ctx->costs.aligned[i] = lens->aligned[i];
+ if (ctx->costs.aligned[i] == 0)
+ ctx->costs.aligned[i] = ctx->params.alg_params.slow.aligned_nostat_cost;
+ }
+}
+
+/* Tell the match-finder to skip the specified number of bytes (@n) in the
+ * input. */
+static void
+lzx_lz_skip_bytes(struct lzx_compressor *ctx, input_idx_t n)
+{
+ LZX_ASSERT(n <= ctx->match_window_end - ctx->match_window_pos);
+ if (ctx->matches_cached) {
+ ctx->match_window_pos += n;
+ while (n--) {
+ ctx->cached_matches_pos +=
+ ctx->cached_matches[ctx->cached_matches_pos].len + 1;
+ }
+ } else {
+ while (n--) {
+ ctx->cached_matches[ctx->cached_matches_pos++].len = 0;
+ lz_sarray_skip_position(&ctx->lz_sarray);
+ ctx->match_window_pos++;
+ }
+ LZX_ASSERT(lz_sarray_get_pos(&ctx->lz_sarray) == ctx->match_window_pos);
+ }
+}
+
+/* Retrieve a list of matches available at the next position in the input.
+ *
+ * A pointer to the matches array is written into @matches_ret, and the return
+ * value is the number of matches found. */
+static u32
+lzx_lz_get_matches_caching(struct lzx_compressor *ctx,
+ const struct lzx_lru_queue *queue,
+ struct raw_match **matches_ret)