/*
* lzx-comp.c
*
- * LZX compression routines.
- *
- * This code was originally based on code written by Matthew T. Russotto
- * (liblzxcomp).
+ * LZX compression routines, originally based on code written by Matthew T.
+ * Russotto (liblzxcomp), but heavily modified.
*/
/*
*/
-
-/*
+/*
* This file provides lzx_compress(), a function to compress an in-memory buffer
* of data using LZX compression, as used in the WIM file format.
*
- * There is no sliding window, as for the compressed chunks in WIM resources,
- * the window is always the length of the input.
+ * Please see the comments in lzx-decomp.c for more information about this
+ * compression format.
*
- * The basic algorithm should be familiar if you are familiar with Huffman trees
- * and with other LZ77-based formats such as DEFLATE. Otherwise it can be quite
- * tricky to understand. Basically it is the following:
+ * One thing to keep in mind is that there is no sliding window, since the
+ * window is always the entirety of a WIM chunk, which is at most WIM_CHUNK_SIZE
+ * ( = 32768) bytes.
+ *
+ * The basic compression algorithm used here should be familiar if you are
+ * familiar with Huffman trees and with other LZ77 and Huffman-based formats
+ * such as DEFLATE. Otherwise it can be quite tricky to understand. Basically
+ * it is the following:
*
* - Preprocess the input data (LZX-specific)
- * - Go through the input data and determine matches. This part is based on
+ * - Go through the input data and determine matches. This part is based on
* code from zlib, and a hash table of 3-character strings is used to
* accelerate the process of finding matches.
* - Build the Huffman trees based on the frequencies of symbols determined
* while recording matches.
* - Output the block header, including the Huffman trees; then output the
* compressed stream of matches and literal characters.
+ *
+ * It is possible for a WIM chunk to include multiple LZX blocks, since for some
+ * input data this will produce a better compression ratio (especially since
+ * each block can include new Huffman codes). However, producing multiple LZX
+ * blocks from one input chunk is not yet implemented.
*/
-
#include "lzx.h"
#include "comp.h"
#include <math.h>
};
struct lzx_freq_tables {
- u32 main_freq_table[LZX_MAINTREE_NUM_SYMBOLS];
+ u32 main_freq_table[LZX_MAINTREE_NUM_SYMBOLS];
u32 len_freq_table[LZX_LENTREE_NUM_SYMBOLS];
u32 aligned_freq_table[LZX_ALIGNEDTREE_NUM_SYMBOLS];
};
-
-
-
-/* Returns the position slot that corresponds to a given formatted offset. This
- * means searching the lzx_position_base array to find what slot contains a
- * position base that is less than or equal to formatted_offset, where the next
- * slot contains a position base that is greater than or equal to
- * formatted_offset. */
-static uint lzx_get_position_slot(uint formatted_offset)
+/* Returns the LZX position slot that corresponds to a given formatted offset.
+ *
+ * Logically, this returns the smallest i such that
+ * formatted_offset >= lzx_position_base[i].
+ *
+ * The actual implementation below takes advantage of the regularity of the
+ * numbers in the lzx_position_base array to calculate the slot directly from
+ * the formatted offset without actually looking at the array.
+ */
+static inline unsigned lzx_get_position_slot(unsigned formatted_offset)
{
- int left;
- int right;
- int mid;
-
- /* Calculate position base using binary search of table; if log2 can be
- * done in hardware, approximation might work;
- * trunc(log2(formatted_offset*formatted_offset)) gets either the proper
- * position slot or the next one, except for slots 0, 1, and 39-49
- *
- * Slots 0-1 are handled by the R0-R1 procedures
- *
- * Slots 36-49 (formatted_offset >= 262144) can be found by
+#if 0
+ /*
+ * Slots 36-49 (formatted_offset >= 262144) can be found by
* (formatted_offset/131072) + 34 == (formatted_offset >> 17) + 34;
+ * however, this check for formatted_offset >= 262144 is commented out
+ * because WIM chunks cannot be that large.
*/
if (formatted_offset >= 262144) {
return (formatted_offset >> 17) + 34;
- } else {
- left = 3;
- right = LZX_NUM_POSITION_SLOTS - 1;
- while (1) {
- mid = (left + right) >> 1;
- if ((lzx_position_base[mid] <= formatted_offset) &&
- lzx_position_base[mid + 1] > formatted_offset) {
- return mid;
- }
- if (formatted_offset > lzx_position_base[mid])
- /* too low */
- left = mid + 1;
- else /* too high */
- right = mid;
- }
+ } else
+#endif
+ {
+ /* Note: this part here only works if:
+ *
+ * 2 <= formatted_offset < 655360
+ *
+ * It is < 655360 because the frequency of the position bases
+ * increases starting at the 655360 entry, and it is >= 2
+ * because the below calculation fails if the most significant
+ * bit is lower than the 2's place. */
+ wimlib_assert(formatted_offset >= 2 && formatted_offset < 655360);
+ unsigned mssb_idx = bsr32(formatted_offset);
+ return (mssb_idx << 1) |
+ ((formatted_offset >> (mssb_idx - 1)) & 1);
}
}
return literal;
}
-/* Constructs a match from an offset and a length, and updates the LRU queue
- * and the frequency of symbols in the main, length, and aligned offset
- * alphabets. The return value is a 32-bit integer that, if the high bit is
- * set, contains the match length, the position slot, and the position footer
- * for the match. */
-static u32 lzx_record_match(uint match_offset, uint match_len,
+/* Equivalent to lzx_extra_bits[position_slot] except position_slot must be
+ * between 2 and 37 */
+static inline unsigned lzx_get_num_extra_bits(unsigned position_slot)
+{
+#if 0
+ return lzx_extra_bits[position_slot];
+#endif
+ wimlib_assert(position_slot >= 2 && position_slot <= 37);
+ return (position_slot >> 1) - 1;
+}
+
+/* Constructs a match from an offset and a length, and updates the LRU queue and
+ * the frequency of symbols in the main, length, and aligned offset alphabets.
+ * The return value is a 32-bit number that provides the match in an
+ * intermediate representation documented below. */
+static u32 lzx_record_match(unsigned match_offset, unsigned match_len,
void *__freq_tabs, void *__queue)
{
struct lzx_freq_tables *freq_tabs = __freq_tabs;
struct lru_queue *queue = __queue;
- uint formatted_offset;
- uint position_slot;
- uint position_footer = 0;
+ unsigned formatted_offset;
+ unsigned position_slot;
+ unsigned position_footer = 0;
u32 match;
u32 len_header;
u32 len_pos_header;
- uint len_footer;
+ unsigned len_footer;
+ unsigned adjusted_match_len;
wimlib_assert(match_len >= LZX_MIN_MATCH && match_len <= LZX_MAX_MATCH);
+ wimlib_assert(match_offset != 0);
-
+ /* If possible, encode this offset as a repeated offset. */
if (match_offset == queue->R0) {
formatted_offset = 0;
position_slot = 0;
} else {
/* Not a repeated offset. */
+ /* offsets of 0, 1, and 2 are reserved for the repeated offset
+ * codes, so non-repeated offsets must be encoded as 3+. The
+ * minimum offset is 1, so encode the offsets offset by 2. */
formatted_offset = match_offset + LZX_MIN_MATCH;
queue->R2 = queue->R1;
queue->R1 = queue->R0;
queue->R0 = match_offset;
- position_slot = lzx_get_position_slot(formatted_offset);
+ /* The (now-formatted) offset will actually be encoded as a
+ * small position slot number that maps to a certain hard-coded
+ * offset (position base), followed by a number of extra bits---
+ * the position footer--- that are added to the position base to
+ * get the original formatted offset. */
- /* Just the extra bits of the formatted offset. */
- position_footer = ((1UL << lzx_extra_bits[position_slot]) - 1) &
- formatted_offset;
+ position_slot = lzx_get_position_slot(formatted_offset);
+ position_footer = formatted_offset &
+ ((1 << lzx_get_num_extra_bits(position_slot)) - 1);
}
- /* (match length - 2) = 8 bits */
- /* position_slot = 6 bits */
- /* position_footer = 17 bits */
- /* total = 31 bits */
- /* plus one to say whether it's a literal or not */
-
- match = 0x80000000 | /* bit 31 in intelligent bit ordering */
- (position_slot << 25) | /* bits 30-25 */
- (position_footer << 8) | /* bits 8-24 */
- (match_len - LZX_MIN_MATCH); /* bits 0-7 */
-
- /* Update the frequency for the main tree, the length tree (only if a
- * length symbol is to be output), and the aligned tree (only if an
- * aligned symbol is to be output.) */
- if (match_len < (LZX_NUM_PRIMARY_LENS + LZX_MIN_MATCH)) {
- len_header = match_len - LZX_MIN_MATCH;
+ adjusted_match_len = match_len - LZX_MIN_MATCH;
+
+ /* Pack the position slot, position footer, and match length into an
+ * intermediate representation.
+ *
+ * bits description
+ * ---- -----------------------------------------------------------
+ *
+ * 31 1 if a match, 0 if a literal.
+ *
+ * 30-25 position slot. This can be at most 50, so it will fit in 6
+ * bits.
+ *
+ * 8-24 position footer. This is the offset of the real formatted
+ * offset from the position base. This can be at most 17 bits
+ * (since lzx_extra_bits[LZX_NUM_POSITION_SLOTS - 1] is 17).
+ *
+ * 0-7 length of match, offset by 2. This can be at most
+ * (LZX_MAX_MATCH - 2) == 255, so it will fit in 8 bits. */
+ match = 0x80000000 |
+ (position_slot << 25) |
+ (position_footer << 8) |
+ (adjusted_match_len);
+
+ /* The match length must be at least 2, so let the adjusted match length
+ * be the match length minus 2.
+ *
+ * If it is less than 7, the adjusted match length is encoded as a 3-bit
+ * number offset by 2. Otherwise, the 3-bit length header is all 1's
+ * and the actual adjusted length is given as a symbol encoded with the
+ * length tree, offset by 7.
+ */
+ if (adjusted_match_len < LZX_NUM_PRIMARY_LENS) {
+ len_header = adjusted_match_len;
} else {
len_header = LZX_NUM_PRIMARY_LENS;
- len_footer = match_len - (LZX_NUM_PRIMARY_LENS + LZX_MIN_MATCH);
+ len_footer = adjusted_match_len - LZX_NUM_PRIMARY_LENS;
freq_tabs->len_freq_table[len_footer]++;
}
len_pos_header = (position_slot << 3) | len_header;
freq_tabs->main_freq_table[len_pos_header + LZX_NUM_CHARS]++;
- if (lzx_extra_bits[position_slot] >= 3)
+ /* Equivalent to:
+ * if (lzx_extra_bits[position_slot] >= 3) */
+ if (position_slot >= 8)
freq_tabs->aligned_freq_table[position_footer & 7]++;
return match;
}
-/*
+/*
* Writes a compressed literal match to the output.
*
* @out: The output bitstream.
* main, length, and aligned offset Huffman codes.
*/
static int lzx_write_match(struct output_bitstream *out, int block_type,
- u32 match, const struct lzx_codes *codes)
+ u32 match, const struct lzx_codes *codes)
{
/* low 8 bits are the match length minus 2 */
- uint match_len_minus_2 = match & 0xff;
+ unsigned match_len_minus_2 = match & 0xff;
/* Next 17 bits are the position footer */
- uint position_footer = (match >> 8) & 0x1ffff; /* 17 bits */
+ unsigned position_footer = (match >> 8) & 0x1ffff; /* 17 bits */
/* Next 6 bits are the position slot. */
- uint position_slot = (match >> 25) & 0x3f; /* 6 bits */
- uint len_header;
- uint len_footer;
- uint len_pos_header;
- uint main_symbol;
- uint num_extra_bits;
- uint verbatim_bits;
- uint aligned_bits;
+ unsigned position_slot = (match >> 25) & 0x3f; /* 6 bits */
+ unsigned len_header;
+ unsigned len_footer;
+ unsigned len_pos_header;
+ unsigned main_symbol;
+ unsigned num_extra_bits;
+ unsigned verbatim_bits;
+ unsigned aligned_bits;
int ret;
/* If the match length is less than MIN_MATCH (= 2) +
len_header = match_len_minus_2;
/* No length footer-- mark it with a special
* value. */
- len_footer = (uint)(-1);
+ len_footer = (unsigned)(-1);
} else {
len_header = LZX_NUM_PRIMARY_LENS;
len_footer = match_len_minus_2 - LZX_NUM_PRIMARY_LENS;
main_symbol = len_pos_header + LZX_NUM_CHARS;
/* Output main symbol. */
- ret = bitstream_put_bits(out, codes->main_codewords[main_symbol],
+ ret = bitstream_put_bits(out, codes->main_codewords[main_symbol],
codes->main_lens[main_symbol]);
if (ret != 0)
return ret;
/* If there is a length footer, output it using the
* length Huffman code. */
- if (len_footer != (uint)(-1)) {
+ if (len_footer != (unsigned)(-1)) {
ret = bitstream_put_bits(out, codes->len_codewords[len_footer],
codes->len_lens[len_footer]);
if (ret != 0)
* aligned offset tree. Otherwise, only the verbatim bits need to be
* output. */
if ((block_type == LZX_BLOCKTYPE_ALIGNED) && (num_extra_bits >= 3)) {
-
+
verbatim_bits = position_footer >> 3;
- ret = bitstream_put_bits(out, verbatim_bits,
+ ret = bitstream_put_bits(out, verbatim_bits,
num_extra_bits - 3);
if (ret != 0)
return ret;
aligned_bits = (position_footer & 7);
- ret = bitstream_put_bits(out,
+ ret = bitstream_put_bits(out,
codes->aligned_codewords[aligned_bits],
codes->aligned_lens[aligned_bits]);
if (ret != 0)
return 0;
}
-/*
+/*
* Writes all compressed literals in a block, both matches and literal bytes, to
* the output bitstream.
*
* @codes: Pointer to a structure that contains the codewords for the
* main, length, and aligned offset Huffman codes.
*/
-static int lzx_write_compressed_literals(struct output_bitstream *ostream,
+static int lzx_write_compressed_literals(struct output_bitstream *ostream,
int block_type,
- const u32 match_tab[],
- uint num_compressed_literals,
+ const u32 match_tab[],
+ unsigned num_compressed_literals,
const struct lzx_codes *codes)
{
- uint i;
+ unsigned i;
u32 match;
int ret;
* actual match (1) or a literal uncompressed byte (0) */
if (match & 0x80000000) {
/* match */
- ret = lzx_write_match(ostream, block_type, match,
+ ret = lzx_write_match(ostream, block_type, match,
codes);
if (ret != 0)
return ret;
} else {
/* literal byte */
wimlib_assert(match < LZX_NUM_CHARS);
- ret = bitstream_put_bits(ostream,
+ ret = bitstream_put_bits(ostream,
codes->main_codewords[match],
codes->main_lens[match]);
if (ret != 0)
return 0;
}
-/*
+/*
* Writes a compressed Huffman tree to the output, preceded by the pretree for
* it.
*
* @lens: The code lengths for the Huffman tree, indexed by symbol.
* @num_symbols: The number of symbols in the code.
*/
-static int lzx_write_compressed_tree(struct output_bitstream *out,
- const u8 lens[],
- uint num_symbols)
+static int lzx_write_compressed_tree(struct output_bitstream *out,
+ const u8 lens[], unsigned num_symbols)
{
/* Frequencies of the length symbols, including the RLE symbols (NOT the
* actual lengths themselves). */
- uint pretree_freqs[LZX_PRETREE_NUM_SYMBOLS];
+ unsigned pretree_freqs[LZX_PRETREE_NUM_SYMBOLS];
u8 pretree_lens[LZX_PRETREE_NUM_SYMBOLS];
u16 pretree_codewords[LZX_PRETREE_NUM_SYMBOLS];
u8 output_syms[num_symbols * 2];
- uint output_syms_idx;
- uint cur_run_len;
- uint i;
- uint len_in_run;
- uint additional_bits;
+ unsigned output_syms_idx;
+ unsigned cur_run_len;
+ unsigned i;
+ unsigned len_in_run;
+ unsigned additional_bits;
char delta;
u8 pretree_sym;
/* Build the pretree from the frequencies of the length symbols. */
- make_canonical_huffman_code(LZX_PRETREE_NUM_SYMBOLS,
- LZX_MAX_CODEWORD_LEN,
- pretree_freqs, pretree_lens,
+ make_canonical_huffman_code(LZX_PRETREE_NUM_SYMBOLS,
+ LZX_MAX_CODEWORD_LEN,
+ pretree_freqs, pretree_lens,
pretree_codewords);
/* Write the lengths of the pretree codes to the output. */
for (i = 0; i < LZX_PRETREE_NUM_SYMBOLS; i++)
- bitstream_put_bits(out, pretree_lens[i],
+ bitstream_put_bits(out, pretree_lens[i],
LZX_PRETREE_ELEMENT_SIZE);
/* Write the length symbols, encoded with the pretree, to the output. */
while (i < output_syms_idx) {
pretree_sym = output_syms[i++];
- bitstream_put_bits(out, pretree_codewords[pretree_sym],
+ bitstream_put_bits(out, pretree_codewords[pretree_sym],
pretree_lens[pretree_sym]);
switch (pretree_sym) {
case 17:
break;
case 19:
bitstream_put_bits(out, output_syms[i++], 1);
- bitstream_put_bits(out,
+ bitstream_put_bits(out,
pretree_codewords[output_syms[i]],
pretree_lens[output_syms[i]]);
i++;
static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs,
struct lzx_codes *codes)
{
- make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS,
+ make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS,
LZX_MAX_CODEWORD_LEN,
- freq_tabs->main_freq_table,
+ freq_tabs->main_freq_table,
codes->main_lens,
codes->main_codewords);
- make_canonical_huffman_code(LZX_LENTREE_NUM_SYMBOLS,
+ make_canonical_huffman_code(LZX_LENTREE_NUM_SYMBOLS,
LZX_MAX_CODEWORD_LEN,
- freq_tabs->len_freq_table,
+ freq_tabs->len_freq_table,
codes->len_lens,
codes->len_codewords);
make_canonical_huffman_code(LZX_ALIGNEDTREE_NUM_SYMBOLS, 8,
- freq_tabs->aligned_freq_table,
+ freq_tabs->aligned_freq_table,
codes->aligned_lens,
codes->aligned_codewords);
}
* no bit to indicate that it actually is used, unlike in the LZX compressed
* format as used in other file formats such as the cabinet format, where a bit
* is reserved for that purpose. */
-static void do_call_insn_preprocessing(u8 uncompressed_data[],
- uint uncompressed_data_len)
+static void do_call_insn_preprocessing(u8 uncompressed_data[],
+ unsigned uncompressed_data_len)
{
int i = 0;
int file_size = LZX_MAGIC_FILESIZE;
/* Not enabled in the last 6 bytes, which means the 5-byte call
* instruction cannot start in the last *10* bytes. */
- while (i < uncompressed_data_len - 10) {
+ while (i < uncompressed_data_len - 10) {
if (uncompressed_data[i] != 0xe8) {
i++;
continue;
}
- rel_offset = to_le32(*(int32_t*)(uncompressed_data + i + 1));
+ rel_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1));
if (rel_offset >= -i && rel_offset < file_size) {
if (rel_offset < file_size - i) {
/* "compensating translation" */
abs_offset = rel_offset - file_size;
}
- *(int32_t*)(uncompressed_data + i + 1) = to_le32(abs_offset);
+ *(int32_t*)(uncompressed_data + i + 1) = cpu_to_le32(abs_offset);
}
i += 5;
}
static const struct lz_params lzx_lz_params = {
+
+ /* LZX_MIN_MATCH == 2, but 2-character matches are rarely useful; the
+ * minimum match for compression is set to 3 instead. */
.min_match = 3,
+
.max_match = LZX_MAX_MATCH,
.good_match = LZX_MAX_MATCH,
.nice_match = LZX_MAX_MATCH,
.too_far = 4096,
};
-/*
+/*
* Performs LZX compression on a block of data.
*
* @__uncompressed_data: Pointer to the data to be compressed.
* @compressed_data and @compressed_len_ret will contain the compressed data and
* its length. A return value of nonzero means that compressing the data did
* not reduce its size, and @compressed_data will not contain the full
- * compressed data.
+ * compressed data.
*/
-int lzx_compress(const void *__uncompressed_data, uint uncompressed_len,
- void *compressed_data, uint *compressed_len_ret)
+int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len,
+ void *compressed_data, unsigned *compressed_len_ret)
{
struct output_bitstream ostream;
u8 uncompressed_data[uncompressed_len + LZX_MAX_MATCH];
struct lzx_freq_tables freq_tabs;
struct lzx_codes codes;
u32 match_tab[uncompressed_len];
- struct lru_queue queue = {.R0 = 1, .R1 = 1, .R2 = 1};
- uint num_matches;
- uint compressed_len;
- uint i;
+ struct lru_queue queue;
+ unsigned num_matches;
+ unsigned compressed_len;
+ unsigned i;
int ret;
int block_type = LZX_BLOCKTYPE_ALIGNED;
- LZX_DEBUG("uncompressed_len = %u", uncompressed_len);
-
if (uncompressed_len < 100)
return 1;
-
memset(&freq_tabs, 0, sizeof(freq_tabs));
+ queue.R0 = 1;
+ queue.R1 = 1;
+ queue.R2 = 1;
/* The input data must be preprocessed. To avoid changing the original
* input, copy it to a temporary buffer. */
memcpy(uncompressed_data, __uncompressed_data, uncompressed_len);
-
/* Before doing any actual compression, do the call instruction (0xe8
* byte) translation on the uncompressed data. */
do_call_insn_preprocessing(uncompressed_data, uncompressed_len);
-
/* Determine the sequence of matches and literals that will be output,
* and in the process, keep counts of the number of times each symbol
* will be output, so that the Huffman trees can be made. */
&queue, freq_tabs.main_freq_table,
&lzx_lz_params);
- LZX_DEBUG("using %u matches", num_matches);
-
lzx_make_huffman_codes(&freq_tabs, &codes);
/* Initialize the output bitstream. */
/* Write the pre-tree and lengths for the first LZX_NUM_CHARS symbols in the
* main tree. */
- ret = lzx_write_compressed_tree(&ostream, codes.main_lens,
+ ret = lzx_write_compressed_tree(&ostream, codes.main_lens,
LZX_NUM_CHARS);
if (ret != 0)
return ret;
/* Write the pre-tree and symbols for the rest of the main tree. */
- ret = lzx_write_compressed_tree(&ostream, codes.main_lens +
- LZX_NUM_CHARS,
- LZX_MAINTREE_NUM_SYMBOLS -
+ ret = lzx_write_compressed_tree(&ostream, codes.main_lens +
+ LZX_NUM_CHARS,
+ LZX_MAINTREE_NUM_SYMBOLS -
LZX_NUM_CHARS);
if (ret != 0)
return ret;
/* Write the pre-tree and symbols for the length tree. */
- ret = lzx_write_compressed_tree(&ostream, codes.len_lens,
+ ret = lzx_write_compressed_tree(&ostream, codes.len_lens,
LZX_LENTREE_NUM_SYMBOLS);
if (ret != 0)
return ret;
compressed_len = ostream.bit_output - (u8*)compressed_data;
- LZX_DEBUG("Compressed %u => %u bytes",
- uncompressed_len, compressed_len);
-
*compressed_len_ret = compressed_len;
#ifdef ENABLE_VERIFY_COMPRESSION
/* Verify that we really get the same thing back when decompressing. */
- LZX_DEBUG("Verifying the compressed data.");
u8 buf[uncompressed_len];
- ret = lzx_decompress(compressed_data, compressed_len, buf,
+ ret = lzx_decompress(compressed_data, compressed_len, buf,
uncompressed_len);
if (ret != 0) {
ERROR("lzx_compress(): Failed to decompress data we compressed");
abort();
}
}
- LZX_DEBUG("Compression verified to be correct.");
#endif
-
return 0;
}