#include "wimlib/endianness.h"
#include "wimlib/error.h"
#include "wimlib/lz_mf.h"
+#include "wimlib/lz_repsearch.h"
#include "wimlib/lzx.h"
#include "wimlib/util.h"
#include <string.h>
*
* @os:
* The bitstream to which to write the match.
- * @block_type:
- * The type of the LZX block (LZX_BLOCKTYPE_ALIGNED or
- * LZX_BLOCKTYPE_VERBATIM)
+ * @ones_if_aligned
+ * A mask of all ones if the block is of type LZX_BLOCKTYPE_ALIGNED,
+ * otherwise 0.
* @match:
* The match data.
* @codes:
* and aligned offset Huffman codes for the current LZX compressed block.
*/
static void
-lzx_write_match(struct lzx_output_bitstream *os, int block_type,
+lzx_write_match(struct lzx_output_bitstream *os, unsigned ones_if_aligned,
struct lzx_item match, const struct lzx_codes *codes)
{
unsigned match_len_minus_2 = match.data & 0xff;
num_extra_bits = lzx_get_num_extra_bits(position_slot);
- if ((block_type == LZX_BLOCKTYPE_ALIGNED) && (num_extra_bits >= 3)) {
+ if ((num_extra_bits & ones_if_aligned) >= 3) {
/* Aligned offset blocks: The low 3 bits of the position footer
* are Huffman-encoded using the aligned offset code. The
}
static unsigned
-lzx_build_precode(const u8 lens[restrict],
- const u8 prev_lens[restrict],
- const unsigned num_syms,
- u32 precode_freqs[restrict LZX_PRECODE_NUM_SYMBOLS],
- u8 output_syms[restrict num_syms],
- u8 precode_lens[restrict LZX_PRECODE_NUM_SYMBOLS],
- u32 precode_codewords[restrict LZX_PRECODE_NUM_SYMBOLS],
- unsigned *num_additional_bits_ret)
+lzx_compute_precode_items(const u8 lens[restrict],
+ const u8 prev_lens[restrict],
+ const unsigned num_lens,
+ u32 precode_freqs[restrict],
+ unsigned precode_items[restrict])
{
- memset(precode_freqs, 0,
- LZX_PRECODE_NUM_SYMBOLS * sizeof(precode_freqs[0]));
-
- /* Since the code word lengths use a form of RLE encoding, the goal here
- * is to find each run of identical lengths when going through them in
- * symbol order (including runs of length 1). For each run, as many
- * lengths are encoded using RLE as possible, and the rest are output
- * literally.
- *
- * output_syms[] will be filled in with the length symbols that will be
- * output, including RLE codes, not yet encoded using the precode.
- *
- * cur_run_len keeps track of how many code word lengths are in the
- * current run of identical lengths. */
- unsigned output_syms_idx = 0;
- unsigned cur_run_len = 1;
- unsigned num_additional_bits = 0;
- for (unsigned i = 1; i <= num_syms; i++) {
-
- if (i != num_syms && lens[i] == lens[i - 1]) {
- /* Still in a run--- keep going. */
- cur_run_len++;
- continue;
- }
+ unsigned *itemptr;
+ unsigned run_start;
+ unsigned run_end;
+ unsigned extra_bits;
+ int delta;
+ u8 len;
+
+ itemptr = precode_items;
+ run_start = 0;
+ do {
+ /* Find the next run of codeword lengths. */
+
+ /* len = the length being repeated */
+ len = lens[run_start];
- /* Run ended! Check if it is a run of zeroes or a run of
- * nonzeroes. */
+ run_end = run_start + 1;
- /* The symbol that was repeated in the run--- not to be confused
- * with the length *of* the run (cur_run_len) */
- unsigned len_in_run = lens[i - 1];
+ /* Fast case for a single length. */
+ if (likely(run_end == num_lens || len != lens[run_end])) {
+ delta = prev_lens[run_start] - len;
+ if (delta < 0)
+ delta += 17;
+ precode_freqs[delta]++;
+ *itemptr++ = delta;
+ run_start++;
+ continue;
+ }
- if (len_in_run == 0) {
- /* A run of 0's. Encode it in as few length
- * codes as we can. */
+ /* Extend the run. */
+ do {
+ run_end++;
+ } while (run_end != num_lens && len == lens[run_end]);
- /* The magic length 18 indicates a run of 20 + n zeroes,
- * where n is an uncompressed literal 5-bit integer that
- * follows the magic length. */
- while (cur_run_len >= 20) {
- unsigned additional_bits;
+ if (len == 0) {
+ /* Run of zeroes. */
- additional_bits = min(cur_run_len - 20, 0x1f);
- num_additional_bits += 5;
+ /* Symbol 18: RLE 20 to 51 zeroes at a time. */
+ while ((run_end - run_start) >= 20) {
+ extra_bits = min((run_end - run_start) - 20, 0x1f);
precode_freqs[18]++;
- output_syms[output_syms_idx++] = 18;
- output_syms[output_syms_idx++] = additional_bits;
- cur_run_len -= 20 + additional_bits;
+ *itemptr++ = 18 | (extra_bits << 5);
+ run_start += 20 + extra_bits;
}
- /* The magic length 17 indicates a run of 4 + n zeroes,
- * where n is an uncompressed literal 4-bit integer that
- * follows the magic length. */
- while (cur_run_len >= 4) {
- unsigned additional_bits;
-
- additional_bits = min(cur_run_len - 4, 0xf);
- num_additional_bits += 4;
+ /* Symbol 17: RLE 4 to 19 zeroes at a time. */
+ if ((run_end - run_start) >= 4) {
+ extra_bits = min((run_end - run_start) - 4, 0xf);
precode_freqs[17]++;
- output_syms[output_syms_idx++] = 17;
- output_syms[output_syms_idx++] = additional_bits;
- cur_run_len -= 4 + additional_bits;
+ *itemptr++ = 17 | (extra_bits << 5);
+ run_start += 4 + extra_bits;
}
-
} else {
/* A run of nonzero lengths. */
- /* The magic length 19 indicates a run of 4 + n
- * nonzeroes, where n is a literal bit that follows the
- * magic length, and where the value of the lengths in
- * the run is given by an extra length symbol, encoded
- * with the precode, that follows the literal bit.
- *
- * The extra length symbol is encoded as a difference
- * from the length of the codeword for the first symbol
- * in the run in the previous code.
- * */
- while (cur_run_len >= 4) {
- unsigned additional_bits;
- signed char delta;
-
- additional_bits = (cur_run_len > 4);
- num_additional_bits += 1;
- delta = (signed char)prev_lens[i - cur_run_len] -
- (signed char)len_in_run;
+ /* Symbol 19: RLE 4 to 5 of any length at a time. */
+ while ((run_end - run_start) >= 4) {
+ extra_bits = (run_end - run_start) > 4;
+ delta = prev_lens[run_start] - len;
if (delta < 0)
delta += 17;
precode_freqs[19]++;
- precode_freqs[(unsigned char)delta]++;
- output_syms[output_syms_idx++] = 19;
- output_syms[output_syms_idx++] = additional_bits;
- output_syms[output_syms_idx++] = delta;
- cur_run_len -= 4 + additional_bits;
+ precode_freqs[delta]++;
+ *itemptr++ = 19 | (extra_bits << 5) | (delta << 6);
+ run_start += 4 + extra_bits;
}
}
- /* Any remaining lengths in the run are outputted without RLE,
- * as a difference from the length of that codeword in the
- * previous code. */
- while (cur_run_len > 0) {
- signed char delta;
-
- delta = (signed char)prev_lens[i - cur_run_len] -
- (signed char)len_in_run;
+ /* Output any remaining lengths without RLE. */
+ while (run_start != run_end) {
+ delta = prev_lens[run_start] - len;
if (delta < 0)
delta += 17;
-
- precode_freqs[(unsigned char)delta]++;
- output_syms[output_syms_idx++] = delta;
- cur_run_len--;
+ precode_freqs[delta]++;
+ *itemptr++ = delta;
+ run_start++;
}
+ } while (run_start != num_lens);
- cur_run_len = 1;
- }
-
- /* Build the precode from the frequencies of the length symbols. */
-
- make_canonical_huffman_code(LZX_PRECODE_NUM_SYMBOLS,
- LZX_MAX_PRE_CODEWORD_LEN,
- precode_freqs, precode_lens,
- precode_codewords);
-
- *num_additional_bits_ret = num_additional_bits;
-
- return output_syms_idx;
+ return itemptr - precode_items;
}
/*
* @prev_lens:
* The codeword lengths, indexed by symbol, in the corresponding Huffman
* code in the previous block, or all zeroes if this is the first block.
- * @num_syms:
+ * @num_lens:
* The number of symbols in the Huffman code.
*/
static void
lzx_write_compressed_code(struct lzx_output_bitstream *os,
const u8 lens[restrict],
const u8 prev_lens[restrict],
- unsigned num_syms)
+ unsigned num_lens)
{
u32 precode_freqs[LZX_PRECODE_NUM_SYMBOLS];
- u8 output_syms[num_syms];
u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
u32 precode_codewords[LZX_PRECODE_NUM_SYMBOLS];
+ unsigned precode_items[num_lens];
+ unsigned num_precode_items;
+ unsigned precode_item;
+ unsigned precode_sym;
unsigned i;
- unsigned num_output_syms;
- u8 precode_sym;
- unsigned dummy;
-
- num_output_syms = lzx_build_precode(lens,
- prev_lens,
- num_syms,
- precode_freqs,
- output_syms,
- precode_lens,
- precode_codewords,
- &dummy);
-
- /* Write the lengths of the precode codes to the output. */
+
for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++)
- lzx_write_bits(os, precode_lens[i], LZX_PRECODE_ELEMENT_SIZE);
+ precode_freqs[i] = 0;
- /* Write the length symbols, encoded with the precode, to the output. */
+ /* Compute the "items" (RLE / literal tokens and extra bits) with which
+ * the codeword lengths in the larger code will be output. */
+ num_precode_items = lzx_compute_precode_items(lens,
+ prev_lens,
+ num_lens,
+ precode_freqs,
+ precode_items);
- for (i = 0; i < num_output_syms; ) {
- precode_sym = output_syms[i++];
+ /* Build the precode. */
+ make_canonical_huffman_code(LZX_PRECODE_NUM_SYMBOLS,
+ LZX_MAX_PRE_CODEWORD_LEN,
+ precode_freqs, precode_lens,
+ precode_codewords);
+ /* Output the lengths of the codewords in the precode. */
+ for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++)
+ lzx_write_bits(os, precode_lens[i], LZX_PRECODE_ELEMENT_SIZE);
+
+ /* Output the encoded lengths of the codewords in the larger code. */
+ for (i = 0; i < num_precode_items; i++) {
+ precode_item = precode_items[i];
+ precode_sym = precode_item & 0x1F;
lzx_write_varbits(os, precode_codewords[precode_sym],
precode_lens[precode_sym],
LZX_MAX_PRE_CODEWORD_LEN);
- switch (precode_sym) {
- case 17:
- lzx_write_bits(os, output_syms[i++], 4);
- break;
- case 18:
- lzx_write_bits(os, output_syms[i++], 5);
- break;
- case 19:
- lzx_write_bits(os, output_syms[i++], 1);
- lzx_write_varbits(os, precode_codewords[output_syms[i]],
- precode_lens[output_syms[i]],
- LZX_MAX_PRE_CODEWORD_LEN);
- i++;
- break;
- default:
- break;
+ if (precode_sym >= 17) {
+ if (precode_sym == 17) {
+ lzx_write_bits(os, precode_item >> 5, 4);
+ } else if (precode_sym == 18) {
+ lzx_write_bits(os, precode_item >> 5, 5);
+ } else {
+ lzx_write_bits(os, (precode_item >> 5) & 1, 1);
+ precode_sym = precode_item >> 6;
+ lzx_write_varbits(os, precode_codewords[precode_sym],
+ precode_lens[precode_sym],
+ LZX_MAX_PRE_CODEWORD_LEN);
+ }
}
}
}
const struct lzx_item items[], u32 num_items,
const struct lzx_codes *codes)
{
+ unsigned ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED);
+
for (u32 i = 0; i < num_items; i++) {
/* The high bit of the 32-bit intermediate representation
* indicates whether the item is an actual LZ-style match (1) or
* a literal byte (0). */
if (items[i].data & 0x80000000)
- lzx_write_match(os, block_type, items[i], codes);
+ lzx_write_match(os, ones_if_aligned, items[i], codes);
else
lzx_write_literal(os, items[i].data, codes);
}
}
/*
- * lzx_choose_near_optimal_match() -
+ * Find the longest repeat offset match.
+ *
+ * If no match of at least LZX_MIN_MATCH_LEN bytes is found, then return 0.
+ *
+ * If a match of at least LZX_MIN_MATCH_LEN bytes is found, then return its
+ * length and set *slot_ret to the index of its offset in @queue.
+ */
+static inline u32
+lzx_repsearch(const u8 * const strptr, const u32 bytes_remaining,
+ const struct lzx_lru_queue *queue, unsigned *slot_ret)
+{
+ BUILD_BUG_ON(LZX_MIN_MATCH_LEN != 2);
+ return lz_repsearch(strptr, bytes_remaining, LZX_MAX_MATCH_LEN,
+ queue->R, LZX_NUM_RECENT_OFFSETS, slot_ret);
+}
+
+/*
+ * lzx_choose_near_optimal_item() -
*
* Choose an approximately optimal match or literal to use at the next position
* in the string, or "window", being LZ-encoded.
/* Search for matches at repeat offsets. As a heuristic, we only keep
* the one with the longest match length. */
- longest_rep_len = LZX_MIN_MATCH_LEN - 1;
- if (c->match_window_pos >= 1) {
- unsigned limit = min(LZX_MAX_MATCH_LEN,
- c->match_window_end - c->match_window_pos);
- for (int i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) {
- u32 offset = c->queue.R[i];
- const u8 *strptr = &c->cur_window[c->match_window_pos];
- const u8 *matchptr = strptr - offset;
- unsigned len = 0;
- while (len < limit && strptr[len] == matchptr[len])
- len++;
- if (len > longest_rep_len) {
- longest_rep_len = len;
- longest_rep_slot = i;
- }
- }
+ if (likely(c->match_window_pos >= 1)) {
+ longest_rep_len = lzx_repsearch(&c->cur_window[c->match_window_pos],
+ c->match_window_end - c->match_window_pos,
+ &c->queue,
+ &longest_rep_slot);
+ } else {
+ longest_rep_len = 0;
}
/* If there's a long match with a repeat offset, choose it immediately. */
}
do {
+ u32 cost;
unsigned len_header;
unsigned main_symbol;
- u32 cost;
cost = position_cost;
- len_header = min(len - LZX_MIN_MATCH_LEN, LZX_NUM_PRIMARY_LENS);
+ if (len - LZX_MIN_MATCH_LEN < LZX_NUM_PRIMARY_LENS) {
+ len_header = len - LZX_MIN_MATCH_LEN;
+ } else {
+ len_header = LZX_NUM_PRIMARY_LENS;
+ cost += c->costs.len[len - LZX_MIN_MATCH_LEN - LZX_NUM_PRIMARY_LENS];
+ }
+
main_symbol = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
cost += c->costs.main[main_symbol];
- if (len_header == LZX_NUM_PRIMARY_LENS)
- cost += c->costs.len[len - LZX_MIN_MATCH_LEN - LZX_NUM_PRIMARY_LENS];
optimum[len].queue = queue;
optimum[len].prev.link = 0;
}
end_pos = longest_len;
- if (longest_rep_len >= LZX_MIN_MATCH_LEN) {
+ if (longest_rep_len) {
+
+ LZX_ASSERT(longest_rep_len >= LZX_MIN_MATCH_LEN);
+
u32 cost;
while (end_pos < longest_rep_len)
/* Search for matches at repeat offsets. Again, as a heuristic
* we only keep the longest one. */
- longest_rep_len = LZX_MIN_MATCH_LEN - 1;
- unsigned limit = min(LZX_MAX_MATCH_LEN,
- c->match_window_end - c->match_window_pos);
- for (int i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) {
- u32 offset = optimum[cur_pos].queue.R[i];
- const u8 *strptr = &c->cur_window[c->match_window_pos];
- const u8 *matchptr = strptr - offset;
- unsigned len = 0;
- while (len < limit && strptr[len] == matchptr[len])
- len++;
- if (len > longest_rep_len) {
- longest_rep_len = len;
- longest_rep_slot = i;
- }
- }
+ longest_rep_len = lzx_repsearch(&c->cur_window[c->match_window_pos],
+ c->match_window_end - c->match_window_pos,
+ &optimum[cur_pos].queue,
+ &longest_rep_slot);
/* If we found a long match at a repeat offset, choose it
* immediately. */
have_position_cost:
do {
+ u32 cost;
unsigned len_header;
unsigned main_symbol;
- u32 cost;
cost = position_cost;
- len_header = min(len - LZX_MIN_MATCH_LEN,
- LZX_NUM_PRIMARY_LENS);
- main_symbol = ((position_slot << 3) | len_header) +
- LZX_NUM_CHARS;
- cost += c->costs.main[main_symbol];
- if (len_header == LZX_NUM_PRIMARY_LENS) {
+ if (len - LZX_MIN_MATCH_LEN < LZX_NUM_PRIMARY_LENS) {
+ len_header = len - LZX_MIN_MATCH_LEN;
+ } else {
+ len_header = LZX_NUM_PRIMARY_LENS;
cost += c->costs.len[len -
LZX_MIN_MATCH_LEN -
LZX_NUM_PRIMARY_LENS];
}
+
+ main_symbol = ((position_slot << 3) | len_header) +
+ LZX_NUM_CHARS;
+ cost += c->costs.main[main_symbol];
+
if (cost < optimum[cur_pos + len].cost) {
if (position_slot < LZX_NUM_RECENT_OFFSETS) {
optimum[cur_pos + len].queue = optimum[cur_pos].queue;
* of the longest repeat offset match. Still didn't seem quite
* worth it, though.
*/
- if (longest_rep_len >= LZX_MIN_MATCH_LEN) {
+ if (longest_rep_len) {
+
+ LZX_ASSERT(longest_rep_len >= LZX_MIN_MATCH_LEN);
while (end_pos < cur_pos + longest_rep_len)
optimum[++end_pos].cost = MC_INFINITE_COST;