X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzx-compress.c;h=61c484318179a383b2f5f1af4dc9bc58b10d67cd;hp=effe656bf20fcd14f65c780e13b7fc78c1d31483;hb=db7cd2644605566eefc11c0c992fce670938687c;hpb=d66b5c805c4e9a660bac6f979d88c1820cb031f2 diff --git a/src/lzx-compress.c b/src/lzx-compress.c index effe656b..61c48431 100644 --- a/src/lzx-compress.c +++ b/src/lzx-compress.c @@ -7,7 +7,7 @@ /* * Copyright (C) 2002 Matthew T. Russotto - * Copyright (C) 2012 Eric Biggers + * Copyright (C) 2012, 2013 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * @@ -27,8 +27,8 @@ /* - * This file provides lzx_compress(), a function to compress an in-memory buffer - * of data using LZX compression, as used in the WIM file format. + * This file provides wimlib_lzx_compress(), a function to compress an in-memory + * buffer of data using LZX compression, as used in the WIM file format. * * Please see the comments in lzx-decompress.c for more information about this * compression format. @@ -57,8 +57,16 @@ * blocks from one input chunk is not yet implemented. */ -#include "lzx.h" -#include "compress.h" +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wimlib.h" +#include "wimlib/compress.h" +#include "wimlib/error.h" +#include "wimlib/lzx.h" +#include "wimlib/util.h" + #include #include @@ -91,7 +99,8 @@ struct lzx_freq_tables { * numbers in the lzx_position_base array to calculate the slot directly from * the formatted offset without actually looking at the array. */ -static inline unsigned lzx_get_position_slot(unsigned formatted_offset) +static inline unsigned +lzx_get_position_slot(unsigned formatted_offset) { #if 0 /* @@ -120,9 +129,10 @@ static inline unsigned lzx_get_position_slot(unsigned formatted_offset) } } -static u32 lzx_record_literal(u8 literal, void *__main_freq_tab) +static u32 +lzx_record_literal(u8 literal, void *_main_freq_tab) { - freq_t *main_freq_tab = __main_freq_tab; + freq_t *main_freq_tab = _main_freq_tab; main_freq_tab[literal]++; return literal; } @@ -131,12 +141,12 @@ static u32 lzx_record_literal(u8 literal, void *__main_freq_tab) * the frequency of symbols in the main, length, and aligned offset alphabets. * The return value is a 32-bit number that provides the match in an * intermediate representation documented below. */ -static u32 lzx_record_match(unsigned match_offset, unsigned match_len, - void *__freq_tabs, void *__queue) +static u32 +lzx_record_match(unsigned match_offset, unsigned match_len, + void *_freq_tabs, void *_queue) { - struct lzx_freq_tables *freq_tabs = __freq_tabs; - struct lru_queue *queue = __queue; - unsigned formatted_offset; + struct lzx_freq_tables *freq_tabs = _freq_tabs; + struct lru_queue *queue = _queue; unsigned position_slot; unsigned position_footer = 0; u32 match; @@ -150,23 +160,20 @@ static u32 lzx_record_match(unsigned match_offset, unsigned match_len, /* If possible, encode this offset as a repeated offset. */ if (match_offset == queue->R0) { - formatted_offset = 0; - position_slot = 0; + position_slot = 0; } else if (match_offset == queue->R1) { swap(queue->R0, queue->R1); - formatted_offset = 1; - position_slot = 1; + position_slot = 1; } else if (match_offset == queue->R2) { swap(queue->R0, queue->R2); - formatted_offset = 2; - position_slot = 2; + position_slot = 2; } else { /* Not a repeated offset. */ /* offsets of 0, 1, and 2 are reserved for the repeated offset * codes, so non-repeated offsets must be encoded as 3+. The * minimum offset is 1, so encode the offsets offset by 2. */ - formatted_offset = match_offset + LZX_MIN_MATCH; + unsigned formatted_offset = match_offset + LZX_MIN_MATCH; queue->R2 = queue->R1; queue->R1 = queue->R0; @@ -245,8 +252,9 @@ static u32 lzx_record_match(unsigned match_offset, unsigned match_len, * @codes: Pointer to a structure that contains the codewords for the * main, length, and aligned offset Huffman codes. */ -static int lzx_write_match(struct output_bitstream *out, int block_type, - u32 match, const struct lzx_codes *codes) +static int +lzx_write_match(struct output_bitstream *out, int block_type, + u32 match, const struct lzx_codes *codes) { /* low 8 bits are the match length minus 2 */ unsigned match_len_minus_2 = match & 0xff; @@ -351,11 +359,12 @@ static int lzx_write_match(struct output_bitstream *out, int block_type, * @codes: Pointer to a structure that contains the codewords for the * main, length, and aligned offset Huffman codes. */ -static int lzx_write_compressed_literals(struct output_bitstream *ostream, - int block_type, - const u32 match_tab[], - unsigned num_compressed_literals, - const struct lzx_codes *codes) +static int +lzx_write_compressed_literals(struct output_bitstream *ostream, + int block_type, + const u32 match_tab[], + unsigned num_compressed_literals, + const struct lzx_codes *codes) { unsigned i; u32 match; @@ -401,8 +410,9 @@ static int lzx_write_compressed_literals(struct output_bitstream *ostream, * @lens: The code lengths for the Huffman tree, indexed by symbol. * @num_symbols: The number of symbols in the code. */ -static int lzx_write_compressed_tree(struct output_bitstream *out, - const u8 lens[], unsigned num_symbols) +static int +lzx_write_compressed_tree(struct output_bitstream *out, + const u8 lens[], unsigned num_symbols) { /* Frequencies of the length symbols, including the RLE symbols (NOT the * actual lengths themselves). */ @@ -415,7 +425,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out, unsigned i; unsigned len_in_run; unsigned additional_bits; - char delta; + signed char delta; u8 pretree_sym; ZERO_ARRAY(pretree_freqs); @@ -492,7 +502,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out, * */ while (cur_run_len >= 4) { additional_bits = (cur_run_len > 4); - delta = -(char)len_in_run; + delta = -(signed char)len_in_run; if (delta < 0) delta += 17; pretree_freqs[19]++; @@ -508,7 +518,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out, * as a difference from the length of that codeword in the * previous tree. */ while (cur_run_len--) { - delta = -(char)len_in_run; + delta = -(signed char)len_in_run; if (delta < 0) delta += 17; @@ -564,8 +574,9 @@ static int lzx_write_compressed_tree(struct output_bitstream *out, /* Builds the canonical Huffman code for the main tree, the length tree, and the * aligned offset tree. */ -static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs, - struct lzx_codes *codes) +static void +lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs, + struct lzx_codes *codes) { make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS, LZX_MAX_CODEWORD_LEN, @@ -585,11 +596,12 @@ static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs, codes->aligned_codewords); } -static void do_call_insn_translation(u32 *call_insn_target, int input_pos, - int32_t file_size) +static void +do_call_insn_translation(u32 *call_insn_target, int input_pos, + s32 file_size) { - int32_t abs_offset; - int32_t rel_offset; + s32 abs_offset; + s32 rel_offset; rel_offset = le32_to_cpu(*call_insn_target); if (rel_offset >= -input_pos && rel_offset < file_size) { @@ -606,8 +618,8 @@ static void do_call_insn_translation(u32 *call_insn_target, int input_pos, /* This is the reverse of undo_call_insn_preprocessing() in lzx-decompress.c. * See the comment above that function for more information. */ -static void do_call_insn_preprocessing(u8 uncompressed_data[], - int uncompressed_data_len) +static void +do_call_insn_preprocessing(u8 uncompressed_data[], int uncompressed_data_len) { for (int i = 0; i < uncompressed_data_len - 10; i++) { if (uncompressed_data[i] == 0xe8) { @@ -634,25 +646,10 @@ static const struct lz_params lzx_lz_params = { .too_far = 4096, }; -/* - * Performs LZX compression on a block of data. - * - * @__uncompressed_data: Pointer to the data to be compressed. - * @uncompressed_len: Length, in bytes, of the data to be compressed. - * @compressed_data: Pointer to a location at least (@uncompressed_len - 1) - * bytes long into which the compressed data may be - * written. - * @compressed_len_ret: A pointer to an unsigned int into which the length of - * the compressed data may be returned. - * - * Returns zero if compression was successfully performed. In that case - * @compressed_data and @compressed_len_ret will contain the compressed data and - * its length. A return value of nonzero means that compressing the data did - * not reduce its size, and @compressed_data will not contain the full - * compressed data. - */ -int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len, - void *compressed_data, unsigned *compressed_len_ret) +/* Documented in wimlib.h */ +WIMLIBAPI unsigned +wimlib_lzx_compress(const void *_uncompressed_data, unsigned uncompressed_len, + void *compressed_data) { struct output_bitstream ostream; u8 uncompressed_data[uncompressed_len + 8]; @@ -666,8 +663,10 @@ int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len, int ret; int block_type = LZX_BLOCKTYPE_ALIGNED; + wimlib_assert(uncompressed_len <= 32768); + if (uncompressed_len < 100) - return 1; + return 0; memset(&freq_tabs, 0, sizeof(freq_tabs)); queue.R0 = 1; @@ -676,7 +675,8 @@ int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len, /* The input data must be preprocessed. To avoid changing the original * input, copy it to a temporary buffer. */ - memcpy(uncompressed_data, __uncompressed_data, uncompressed_len); + memcpy(uncompressed_data, _uncompressed_data, uncompressed_len); + memset(uncompressed_data + uncompressed_len, 0, 8); /* Before doing any actual compression, do the call instruction (0xe8 * byte) translation on the uncompressed data. */ @@ -723,55 +723,55 @@ int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len, * main tree. */ ret = lzx_write_compressed_tree(&ostream, codes.main_lens, LZX_NUM_CHARS); - if (ret != 0) - return ret; + if (ret) + return 0; /* Write the pre-tree and symbols for the rest of the main tree. */ ret = lzx_write_compressed_tree(&ostream, codes.main_lens + LZX_NUM_CHARS, LZX_MAINTREE_NUM_SYMBOLS - LZX_NUM_CHARS); - if (ret != 0) - return ret; + if (ret) + return 0; /* Write the pre-tree and symbols for the length tree. */ ret = lzx_write_compressed_tree(&ostream, codes.len_lens, LZX_LENTREE_NUM_SYMBOLS); - if (ret != 0) - return ret; + if (ret) + return 0; /* Write the compressed literals. */ ret = lzx_write_compressed_literals(&ostream, block_type, match_tab, num_matches, &codes); - if (ret != 0) - return ret; + if (ret) + return 0; ret = flush_output_bitstream(&ostream); - if (ret != 0) - return ret; + if (ret) + return 0; compressed_len = ostream.bit_output - (u8*)compressed_data; - *compressed_len_ret = compressed_len; - #ifdef ENABLE_VERIFY_COMPRESSION /* Verify that we really get the same thing back when decompressing. */ - u8 buf[uncompressed_len]; - ret = lzx_decompress(compressed_data, compressed_len, buf, - uncompressed_len); - if (ret != 0) { - ERROR("lzx_compress(): Failed to decompress data we compressed"); - abort(); - } - - for (i = 0; i < uncompressed_len; i++) { - if (buf[i] != *((u8*)__uncompressed_data + i)) { - ERROR("lzx_compress(): Data we compressed didn't " - "decompress to the original data (difference at " - "byte %u of %u)", i + 1, uncompressed_len); + { + u8 buf[uncompressed_len]; + ret = wimlib_lzx_decompress(compressed_data, compressed_len, + buf, uncompressed_len); + if (ret != 0) { + ERROR("lzx_compress(): Failed to decompress data we compressed"); abort(); } + + for (i = 0; i < uncompressed_len; i++) { + if (buf[i] != *((u8*)_uncompressed_data + i)) { + ERROR("lzx_compress(): Data we compressed didn't " + "decompress to the original data (difference at " + "byte %u of %u)", i + 1, uncompressed_len); + abort(); + } + } } #endif - return 0; + return compressed_len; }