X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flzx-comp.c;h=899d1dada6942ce83850a4706f35fb5d25228805;hb=edd7e04ddef20c47975bd2535960b6223496d99b;hp=9115ddf90b44a501437c86a1c4a43c63cc428094;hpb=6f77434ea6ff1407603410e28d1edb966c40e568;p=wimlib diff --git a/src/lzx-comp.c b/src/lzx-comp.c index 9115ddf9..899d1dad 100644 --- a/src/lzx-comp.c +++ b/src/lzx-comp.c @@ -1,7 +1,7 @@ /* * lzx-comp.c * - * LZX compression routines. + * LZX compression routines. * * This code was originally based on code written by Matthew T. Russotto * (liblzxcomp). @@ -14,22 +14,22 @@ * This file is part of wimlib, a library for working with WIM files. * * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) * any later version. * * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * - * You should have received a copy of the GNU Lesser General Public License + * You should have received a copy of the GNU General Public License * along with wimlib; if not, see http://www.gnu.org/licenses/. */ -/* +/* * This file provides lzx_compress(), a function to compress an in-memory buffer * of data using LZX compression, as used in the WIM file format. * @@ -41,7 +41,7 @@ * tricky to understand. Basically it is the following: * * - Preprocess the input data (LZX-specific) - * - Go through the input data and determine matches. This part is based on + * - Go through the input data and determine matches. This part is based on * code from zlib, and a hash table of 3-character strings is used to * accelerate the process of finding matches. * - Build the Huffman trees based on the frequencies of symbols determined @@ -72,7 +72,7 @@ struct lzx_codes { }; struct lzx_freq_tables { - u32 main_freq_table[LZX_MAINTREE_NUM_SYMBOLS]; + u32 main_freq_table[LZX_MAINTREE_NUM_SYMBOLS]; u32 len_freq_table[LZX_LENTREE_NUM_SYMBOLS]; u32 aligned_freq_table[LZX_ALIGNEDTREE_NUM_SYMBOLS]; }; @@ -92,13 +92,13 @@ static uint lzx_get_position_slot(uint formatted_offset) int mid; /* Calculate position base using binary search of table; if log2 can be - * done in hardware, approximation might work; + * done in hardware, approximation might work; * trunc(log2(formatted_offset*formatted_offset)) gets either the proper * position slot or the next one, except for slots 0, 1, and 39-49 * * Slots 0-1 are handled by the R0-R1 procedures * - * Slots 36-49 (formatted_offset >= 262144) can be found by + * Slots 36-49 (formatted_offset >= 262144) can be found by * (formatted_offset/131072) + 34 == (formatted_offset >> 17) + 34; */ if (formatted_offset >= 262144) { @@ -133,7 +133,7 @@ static u32 lzx_record_literal(u8 literal, void *__main_freq_tab) * alphabets. The return value is a 32-bit integer that, if the high bit is * set, contains the match length, the position slot, and the position footer * for the match. */ -static u32 lzx_record_match(uint match_offset, uint match_len, +static u32 lzx_record_match(uint match_offset, uint match_len, void *__freq_tabs, void *__queue) { struct lzx_freq_tables *freq_tabs = __freq_tabs; @@ -209,7 +209,7 @@ static u32 lzx_record_match(uint match_offset, uint match_len, return match; } -/* +/* * Writes a compressed literal match to the output. * * @out: The output bitstream. @@ -266,7 +266,7 @@ static int lzx_write_match(struct output_bitstream *out, int block_type, main_symbol = len_pos_header + LZX_NUM_CHARS; /* Output main symbol. */ - ret = bitstream_put_bits(out, codes->main_codewords[main_symbol], + ret = bitstream_put_bits(out, codes->main_codewords[main_symbol], codes->main_lens[main_symbol]); if (ret != 0) return ret; @@ -289,15 +289,15 @@ static int lzx_write_match(struct output_bitstream *out, int block_type, * aligned offset tree. Otherwise, only the verbatim bits need to be * output. */ if ((block_type == LZX_BLOCKTYPE_ALIGNED) && (num_extra_bits >= 3)) { - + verbatim_bits = position_footer >> 3; - ret = bitstream_put_bits(out, verbatim_bits, + ret = bitstream_put_bits(out, verbatim_bits, num_extra_bits - 3); if (ret != 0) return ret; aligned_bits = (position_footer & 7); - ret = bitstream_put_bits(out, + ret = bitstream_put_bits(out, codes->aligned_codewords[aligned_bits], codes->aligned_lens[aligned_bits]); if (ret != 0) @@ -312,7 +312,7 @@ static int lzx_write_match(struct output_bitstream *out, int block_type, return 0; } -/* +/* * Writes all compressed literals in a block, both matches and literal bytes, to * the output bitstream. * @@ -324,9 +324,9 @@ static int lzx_write_match(struct output_bitstream *out, int block_type, * @codes: Pointer to a structure that contains the codewords for the * main, length, and aligned offset Huffman codes. */ -static int lzx_write_compressed_literals(struct output_bitstream *ostream, +static int lzx_write_compressed_literals(struct output_bitstream *ostream, int block_type, - const u32 match_tab[], + const u32 match_tab[], uint num_compressed_literals, const struct lzx_codes *codes) { @@ -341,14 +341,14 @@ static int lzx_write_compressed_literals(struct output_bitstream *ostream, * actual match (1) or a literal uncompressed byte (0) */ if (match & 0x80000000) { /* match */ - ret = lzx_write_match(ostream, block_type, match, + ret = lzx_write_match(ostream, block_type, match, codes); if (ret != 0) return ret; } else { /* literal byte */ wimlib_assert(match < LZX_NUM_CHARS); - ret = bitstream_put_bits(ostream, + ret = bitstream_put_bits(ostream, codes->main_codewords[match], codes->main_lens[match]); if (ret != 0) @@ -358,7 +358,7 @@ static int lzx_write_compressed_literals(struct output_bitstream *ostream, return 0; } -/* +/* * Writes a compressed Huffman tree to the output, preceded by the pretree for * it. * @@ -374,8 +374,8 @@ static int lzx_write_compressed_literals(struct output_bitstream *ostream, * @lens: The code lengths for the Huffman tree, indexed by symbol. * @num_symbols: The number of symbols in the code. */ -static int lzx_write_compressed_tree(struct output_bitstream *out, - const u8 lens[], +static int lzx_write_compressed_tree(struct output_bitstream *out, + const u8 lens[], uint num_symbols) { /* Frequencies of the length symbols, including the RLE symbols (NOT the @@ -470,7 +470,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out, if (delta < 0) delta += 17; pretree_freqs[19]++; - pretree_freqs[delta]++; + pretree_freqs[(unsigned char)delta]++; output_syms[output_syms_idx++] = 19; output_syms[output_syms_idx++] = additional_bits; output_syms[output_syms_idx++] = delta; @@ -486,7 +486,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out, if (delta < 0) delta += 17; - pretree_freqs[delta]++; + pretree_freqs[(unsigned char)delta]++; output_syms[output_syms_idx++] = delta; } @@ -497,14 +497,14 @@ static int lzx_write_compressed_tree(struct output_bitstream *out, /* Build the pretree from the frequencies of the length symbols. */ - make_canonical_huffman_code(LZX_PRETREE_NUM_SYMBOLS, - LZX_MAX_CODEWORD_LEN, - pretree_freqs, pretree_lens, + make_canonical_huffman_code(LZX_PRETREE_NUM_SYMBOLS, + LZX_MAX_CODEWORD_LEN, + pretree_freqs, pretree_lens, pretree_codewords); /* Write the lengths of the pretree codes to the output. */ for (i = 0; i < LZX_PRETREE_NUM_SYMBOLS; i++) - bitstream_put_bits(out, pretree_lens[i], + bitstream_put_bits(out, pretree_lens[i], LZX_PRETREE_ELEMENT_SIZE); /* Write the length symbols, encoded with the pretree, to the output. */ @@ -513,7 +513,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out, while (i < output_syms_idx) { pretree_sym = output_syms[i++]; - bitstream_put_bits(out, pretree_codewords[pretree_sym], + bitstream_put_bits(out, pretree_codewords[pretree_sym], pretree_lens[pretree_sym]); switch (pretree_sym) { case 17: @@ -524,7 +524,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out, break; case 19: bitstream_put_bits(out, output_syms[i++], 1); - bitstream_put_bits(out, + bitstream_put_bits(out, pretree_codewords[output_syms[i]], pretree_lens[output_syms[i]]); i++; @@ -541,20 +541,20 @@ static int lzx_write_compressed_tree(struct output_bitstream *out, static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs, struct lzx_codes *codes) { - make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS, + make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS, LZX_MAX_CODEWORD_LEN, - freq_tabs->main_freq_table, + freq_tabs->main_freq_table, codes->main_lens, codes->main_codewords); - make_canonical_huffman_code(LZX_LENTREE_NUM_SYMBOLS, + make_canonical_huffman_code(LZX_LENTREE_NUM_SYMBOLS, LZX_MAX_CODEWORD_LEN, - freq_tabs->len_freq_table, + freq_tabs->len_freq_table, codes->len_lens, codes->len_codewords); make_canonical_huffman_code(LZX_ALIGNEDTREE_NUM_SYMBOLS, 8, - freq_tabs->aligned_freq_table, + freq_tabs->aligned_freq_table, codes->aligned_lens, codes->aligned_codewords); } @@ -566,7 +566,7 @@ static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs, * no bit to indicate that it actually is used, unlike in the LZX compressed * format as used in other file formats such as the cabinet format, where a bit * is reserved for that purpose. */ -static void do_call_insn_preprocessing(u8 uncompressed_data[], +static void do_call_insn_preprocessing(u8 uncompressed_data[], uint uncompressed_data_len) { int i = 0; @@ -576,12 +576,12 @@ static void do_call_insn_preprocessing(u8 uncompressed_data[], /* Not enabled in the last 6 bytes, which means the 5-byte call * instruction cannot start in the last *10* bytes. */ - while (i < uncompressed_data_len - 10) { + while (i < uncompressed_data_len - 10) { if (uncompressed_data[i] != 0xe8) { i++; continue; } - rel_offset = to_le32(*(int32_t*)(uncompressed_data + i + 1)); + rel_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1)); if (rel_offset >= -i && rel_offset < file_size) { if (rel_offset < file_size - i) { @@ -591,7 +591,7 @@ static void do_call_insn_preprocessing(u8 uncompressed_data[], /* "compensating translation" */ abs_offset = rel_offset - file_size; } - *(int32_t*)(uncompressed_data + i + 1) = to_le32(abs_offset); + *(int32_t*)(uncompressed_data + i + 1) = cpu_to_le32(abs_offset); } i += 5; } @@ -599,7 +599,11 @@ static void do_call_insn_preprocessing(u8 uncompressed_data[], static const struct lz_params lzx_lz_params = { + + /* LZX_MIN_MATCH == 2, but 2-character matches are rarely useful; the + * minimum match for compression is set to 3 instead. */ .min_match = 3, + .max_match = LZX_MAX_MATCH, .good_match = LZX_MAX_MATCH, .nice_match = LZX_MAX_MATCH, @@ -608,7 +612,7 @@ static const struct lz_params lzx_lz_params = { .too_far = 4096, }; -/* +/* * Performs LZX compression on a block of data. * * @__uncompressed_data: Pointer to the data to be compressed. @@ -623,7 +627,7 @@ static const struct lz_params lzx_lz_params = { * @compressed_data and @compressed_len_ret will contain the compressed data and * its length. A return value of nonzero means that compressing the data did * not reduce its size, and @compressed_data will not contain the full - * compressed data. + * compressed data. */ int lzx_compress(const void *__uncompressed_data, uint uncompressed_len, void *compressed_data, uint *compressed_len_ret) @@ -638,8 +642,9 @@ int lzx_compress(const void *__uncompressed_data, uint uncompressed_len, uint compressed_len; uint i; int ret; + int block_type = LZX_BLOCKTYPE_ALIGNED; - LZX_DEBUG("uncompressed_len = %u\n", uncompressed_len); + LZX_DEBUG("uncompressed_len = %u", uncompressed_len); if (uncompressed_len < 100) return 1; @@ -667,8 +672,7 @@ int lzx_compress(const void *__uncompressed_data, uint uncompressed_len, &queue, freq_tabs.main_freq_table, &lzx_lz_params); - LZX_DEBUG("using %u matches\n", num_matches); - + LZX_DEBUG("using %u matches", num_matches); lzx_make_huffman_codes(&freq_tabs, &codes); @@ -677,7 +681,7 @@ int lzx_compress(const void *__uncompressed_data, uint uncompressed_len, /* The first three bits tell us what kind of block it is, and are one * of the LZX_BLOCKTYPE_* values. */ - bitstream_put_bits(&ostream, LZX_BLOCKTYPE_ALIGNED, 3); + bitstream_put_bits(&ostream, block_type, 3); /* The next bit indicates whether the block size is the default (32768), * indicated by a 1 bit, or whether the block size is given by the next @@ -692,33 +696,34 @@ int lzx_compress(const void *__uncompressed_data, uint uncompressed_len, /* Write out the aligned offset tree. Note that M$ lies and says that * the aligned offset tree comes after the length tree, but that is * wrong; it actually is before the main tree. */ - for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++) - bitstream_put_bits(&ostream, codes.aligned_lens[i], - LZX_ALIGNEDTREE_ELEMENT_SIZE); + if (block_type == LZX_BLOCKTYPE_ALIGNED) + for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++) + bitstream_put_bits(&ostream, codes.aligned_lens[i], + LZX_ALIGNEDTREE_ELEMENT_SIZE); /* Write the pre-tree and lengths for the first LZX_NUM_CHARS symbols in the * main tree. */ - ret = lzx_write_compressed_tree(&ostream, codes.main_lens, + ret = lzx_write_compressed_tree(&ostream, codes.main_lens, LZX_NUM_CHARS); if (ret != 0) return ret; /* Write the pre-tree and symbols for the rest of the main tree. */ - ret = lzx_write_compressed_tree(&ostream, codes.main_lens + - LZX_NUM_CHARS, - LZX_MAINTREE_NUM_SYMBOLS - + ret = lzx_write_compressed_tree(&ostream, codes.main_lens + + LZX_NUM_CHARS, + LZX_MAINTREE_NUM_SYMBOLS - LZX_NUM_CHARS); if (ret != 0) return ret; /* Write the pre-tree and symbols for the length tree. */ - ret = lzx_write_compressed_tree(&ostream, codes.len_lens, + ret = lzx_write_compressed_tree(&ostream, codes.len_lens, LZX_LENTREE_NUM_SYMBOLS); if (ret != 0) return ret; /* Write the compressed literals. */ - ret = lzx_write_compressed_literals(&ostream, LZX_BLOCKTYPE_ALIGNED, + ret = lzx_write_compressed_literals(&ostream, block_type, match_tab, num_matches, &codes); if (ret != 0) return ret; @@ -729,31 +734,31 @@ int lzx_compress(const void *__uncompressed_data, uint uncompressed_len, compressed_len = ostream.bit_output - (u8*)compressed_data; - LZX_DEBUG("Compressed %u => %u bytes\n", - uncompressed_len, compressed_len); + LZX_DEBUG("Compressed %u => %u bytes", + uncompressed_len, compressed_len); *compressed_len_ret = compressed_len; #ifdef ENABLE_VERIFY_COMPRESSION /* Verify that we really get the same thing back when decompressing. */ + LZX_DEBUG("Verifying the compressed data."); u8 buf[uncompressed_len]; - ret = lzx_decompress(compressed_data, compressed_len, buf, + ret = lzx_decompress(compressed_data, compressed_len, buf, uncompressed_len); if (ret != 0) { - ERROR("ERROR: Failed to decompress data we compressed!\n"); - exit(0); + ERROR("lzx_compress(): Failed to decompress data we compressed"); abort(); } for (i = 0; i < uncompressed_len; i++) { if (buf[i] != *((u8*)__uncompressed_data + i)) { - ERROR("Data we compressed didn't decompress to " - "the original data (difference at byte %u of " - "%u)\n", i + 1, uncompressed_len); + ERROR("lzx_compress(): Data we compressed didn't " + "decompress to the original data (difference at " + "byte %u of %u)", i + 1, uncompressed_len); abort(); } } - LZX_DEBUG("Compression verified to be correct.\n"); + LZX_DEBUG("Compression verified to be correct."); #endif return 0;