+lzx_flush_output(struct lzx_output_bitstream *os)
+{
+ if (os->next == os->end)
+ return 0;
+
+ if (os->bitcount != 0)
+ put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), os->next++);
+
+ return (const u8 *)os->next - (const u8 *)os->start;
+}
+
+/* Build the main, length, and aligned offset Huffman codes used in LZX.
+ *
+ * This takes as input the frequency tables for each code and produces as output
+ * a set of tables that map symbols to codewords and codeword lengths. */
+static void
+lzx_make_huffman_codes(const struct lzx_freqs *freqs, struct lzx_codes *codes,
+ unsigned num_main_syms)
+{
+ make_canonical_huffman_code(num_main_syms,
+ LZX_MAX_MAIN_CODEWORD_LEN,
+ freqs->main,
+ codes->lens.main,
+ codes->codewords.main);
+
+ make_canonical_huffman_code(LZX_LENCODE_NUM_SYMBOLS,
+ LZX_MAX_LEN_CODEWORD_LEN,
+ freqs->len,
+ codes->lens.len,
+ codes->codewords.len);
+
+ make_canonical_huffman_code(LZX_ALIGNEDCODE_NUM_SYMBOLS,
+ LZX_MAX_ALIGNED_CODEWORD_LEN,
+ freqs->aligned,
+ codes->lens.aligned,
+ codes->codewords.aligned);
+}
+
+static unsigned
+lzx_compute_precode_items(const u8 lens[restrict],
+ const u8 prev_lens[restrict],
+ const unsigned num_lens,
+ u32 precode_freqs[restrict],
+ unsigned precode_items[restrict])
+{
+ unsigned *itemptr;
+ unsigned run_start;
+ unsigned run_end;
+ unsigned extra_bits;
+ int delta;
+ u8 len;
+
+ itemptr = precode_items;
+ run_start = 0;
+ do {
+ /* Find the next run of codeword lengths. */
+
+ /* len = the length being repeated */
+ len = lens[run_start];
+
+ run_end = run_start + 1;
+
+ /* Fast case for a single length. */
+ if (likely(run_end == num_lens || len != lens[run_end])) {
+ delta = prev_lens[run_start] - len;
+ if (delta < 0)
+ delta += 17;
+ precode_freqs[delta]++;
+ *itemptr++ = delta;
+ run_start++;
+ continue;
+ }
+
+ /* Extend the run. */
+ do {
+ run_end++;
+ } while (run_end != num_lens && len == lens[run_end]);
+
+ if (len == 0) {
+ /* Run of zeroes. */
+
+ /* Symbol 18: RLE 20 to 51 zeroes at a time. */
+ while ((run_end - run_start) >= 20) {
+ extra_bits = min((run_end - run_start) - 20, 0x1f);
+ precode_freqs[18]++;
+ *itemptr++ = 18 | (extra_bits << 5);
+ run_start += 20 + extra_bits;
+ }
+
+ /* Symbol 17: RLE 4 to 19 zeroes at a time. */
+ if ((run_end - run_start) >= 4) {
+ extra_bits = min((run_end - run_start) - 4, 0xf);
+ precode_freqs[17]++;
+ *itemptr++ = 17 | (extra_bits << 5);
+ run_start += 4 + extra_bits;
+ }
+ } else {
+
+ /* A run of nonzero lengths. */
+
+ /* Symbol 19: RLE 4 to 5 of any length at a time. */
+ while ((run_end - run_start) >= 4) {
+ extra_bits = (run_end - run_start) > 4;
+ delta = prev_lens[run_start] - len;
+ if (delta < 0)
+ delta += 17;
+ precode_freqs[19]++;
+ precode_freqs[delta]++;
+ *itemptr++ = 19 | (extra_bits << 5) | (delta << 6);
+ run_start += 4 + extra_bits;
+ }
+ }
+
+ /* Output any remaining lengths without RLE. */
+ while (run_start != run_end) {
+ delta = prev_lens[run_start] - len;
+ if (delta < 0)
+ delta += 17;
+ precode_freqs[delta]++;
+ *itemptr++ = delta;
+ run_start++;
+ }
+ } while (run_start != num_lens);
+
+ return itemptr - precode_items;
+}
+
+/*
+ * Output a Huffman code in the compressed form used in LZX.
+ *
+ * The Huffman code is represented in the output as a logical series of codeword
+ * lengths from which the Huffman code, which must be in canonical form, can be
+ * reconstructed.
+ *
+ * The codeword lengths are themselves compressed using a separate Huffman code,
+ * the "precode", which contains a symbol for each possible codeword length in
+ * the larger code as well as several special symbols to represent repeated
+ * codeword lengths (a form of run-length encoding). The precode is itself
+ * constructed in canonical form, and its codeword lengths are represented
+ * literally in 20 4-bit fields that immediately precede the compressed codeword
+ * lengths of the larger code.
+ *
+ * Furthermore, the codeword lengths of the larger code are actually represented
+ * as deltas from the codeword lengths of the corresponding code in the previous
+ * block.
+ *
+ * @os:
+ * Bitstream to which to write the compressed Huffman code.
+ * @lens:
+ * The codeword lengths, indexed by symbol, in the Huffman code.
+ * @prev_lens:
+ * The codeword lengths, indexed by symbol, in the corresponding Huffman
+ * code in the previous block, or all zeroes if this is the first block.
+ * @num_lens:
+ * The number of symbols in the Huffman code.
+ */
+static void
+lzx_write_compressed_code(struct lzx_output_bitstream *os,
+ const u8 lens[restrict],
+ const u8 prev_lens[restrict],
+ unsigned num_lens)
+{
+ u32 precode_freqs[LZX_PRECODE_NUM_SYMBOLS];
+ u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
+ u32 precode_codewords[LZX_PRECODE_NUM_SYMBOLS];
+ unsigned precode_items[num_lens];
+ unsigned num_precode_items;
+ unsigned precode_item;
+ unsigned precode_sym;
+ unsigned i;
+
+ for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++)
+ precode_freqs[i] = 0;
+
+ /* Compute the "items" (RLE / literal tokens and extra bits) with which
+ * the codeword lengths in the larger code will be output. */
+ num_precode_items = lzx_compute_precode_items(lens,
+ prev_lens,
+ num_lens,
+ precode_freqs,
+ precode_items);
+
+ /* Build the precode. */
+ make_canonical_huffman_code(LZX_PRECODE_NUM_SYMBOLS,
+ LZX_MAX_PRE_CODEWORD_LEN,
+ precode_freqs, precode_lens,
+ precode_codewords);
+
+ /* Output the lengths of the codewords in the precode. */
+ for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++)
+ lzx_write_bits(os, precode_lens[i], LZX_PRECODE_ELEMENT_SIZE);
+
+ /* Output the encoded lengths of the codewords in the larger code. */
+ for (i = 0; i < num_precode_items; i++) {
+ precode_item = precode_items[i];
+ precode_sym = precode_item & 0x1F;
+ lzx_write_varbits(os, precode_codewords[precode_sym],
+ precode_lens[precode_sym],
+ LZX_MAX_PRE_CODEWORD_LEN);
+ if (precode_sym >= 17) {
+ if (precode_sym == 17) {
+ lzx_write_bits(os, precode_item >> 5, 4);
+ } else if (precode_sym == 18) {
+ lzx_write_bits(os, precode_item >> 5, 5);
+ } else {
+ lzx_write_bits(os, (precode_item >> 5) & 1, 1);
+ precode_sym = precode_item >> 6;
+ lzx_write_varbits(os, precode_codewords[precode_sym],
+ precode_lens[precode_sym],
+ LZX_MAX_PRE_CODEWORD_LEN);
+ }
+ }
+ }
+}
+
+/* Output a match or literal. */
+static inline void
+lzx_write_item(struct lzx_output_bitstream *os, struct lzx_item item,
+ unsigned ones_if_aligned, const struct lzx_codes *codes)
+{
+ u64 data = item.data;
+ unsigned main_symbol;
+ unsigned len_symbol;
+ unsigned num_extra_bits;
+ u32 extra_bits;
+
+ main_symbol = data & 0x3FF;
+
+ lzx_write_varbits(os, codes->codewords.main[main_symbol],
+ codes->lens.main[main_symbol],
+ LZX_MAX_MAIN_CODEWORD_LEN);
+
+ if (main_symbol < LZX_NUM_CHARS) /* Literal? */
+ return;
+
+ len_symbol = (data >> 10) & 0xFF;
+
+ if (len_symbol != LZX_LENCODE_NUM_SYMBOLS) {
+ lzx_write_varbits(os, codes->codewords.len[len_symbol],
+ codes->lens.len[len_symbol],
+ LZX_MAX_LEN_CODEWORD_LEN);
+ }
+
+ num_extra_bits = (data >> 18) & 0x1F;
+ if (num_extra_bits == 0) /* Small offset or repeat offset match? */
+ return;
+
+ extra_bits = data >> 23;
+
+ /*if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3) {*/
+ if ((num_extra_bits & ones_if_aligned) >= 3) {
+
+ /* Aligned offset blocks: The low 3 bits of the extra offset
+ * bits are Huffman-encoded using the aligned offset code. The
+ * remaining bits are output literally. */
+
+ lzx_write_varbits(os, extra_bits >> 3, num_extra_bits - 3, 14);
+
+ lzx_write_varbits(os, codes->codewords.aligned[extra_bits & 7],
+ codes->lens.aligned[extra_bits & 7],
+ LZX_MAX_ALIGNED_CODEWORD_LEN);