X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzx-compress.c;h=01d3caaaa757de0d6dd47f8c5d471eed7053f48e;hp=b18004684658addcad9575df430bb4332af71bfd;hb=c9a22e5ce23d74d42e8dfd00c21928a5fb807fa7;hpb=40beb80283a2df7af88c8359ca41adb814585e9a diff --git a/src/lzx-compress.c b/src/lzx-compress.c index b1800468..01d3caaa 100644 --- a/src/lzx-compress.c +++ b/src/lzx-compress.c @@ -77,9 +77,9 @@ struct lzx_codes { }; struct lzx_freq_tables { - u32 main_freq_table[LZX_MAINTREE_NUM_SYMBOLS]; - u32 len_freq_table[LZX_LENTREE_NUM_SYMBOLS]; - u32 aligned_freq_table[LZX_ALIGNEDTREE_NUM_SYMBOLS]; + freq_t main_freq_table[LZX_MAINTREE_NUM_SYMBOLS]; + freq_t len_freq_table[LZX_LENTREE_NUM_SYMBOLS]; + freq_t aligned_freq_table[LZX_ALIGNEDTREE_NUM_SYMBOLS]; }; /* Returns the LZX position slot that corresponds to a given formatted offset. @@ -122,22 +122,11 @@ static inline unsigned lzx_get_position_slot(unsigned formatted_offset) static u32 lzx_record_literal(u8 literal, void *__main_freq_tab) { - u32 *main_freq_tab = __main_freq_tab; + freq_t *main_freq_tab = __main_freq_tab; main_freq_tab[literal]++; return literal; } -/* Equivalent to lzx_extra_bits[position_slot] except position_slot must be - * between 2 and 37 */ -static inline unsigned lzx_get_num_extra_bits(unsigned position_slot) -{ -#if 0 - return lzx_extra_bits[position_slot]; -#endif - wimlib_assert(position_slot >= 2 && position_slot <= 37); - return (position_slot >> 1) - 1; -} - /* Constructs a match from an offset and a length, and updates the LRU queue and * the frequency of symbols in the main, length, and aligned offset alphabets. * The return value is a 32-bit number that provides the match in an @@ -147,7 +136,6 @@ static u32 lzx_record_match(unsigned match_offset, unsigned match_len, { struct lzx_freq_tables *freq_tabs = __freq_tabs; struct lru_queue *queue = __queue; - unsigned formatted_offset; unsigned position_slot; unsigned position_footer = 0; u32 match; @@ -161,23 +149,20 @@ static u32 lzx_record_match(unsigned match_offset, unsigned match_len, /* If possible, encode this offset as a repeated offset. */ if (match_offset == queue->R0) { - formatted_offset = 0; - position_slot = 0; + position_slot = 0; } else if (match_offset == queue->R1) { swap(queue->R0, queue->R1); - formatted_offset = 1; - position_slot = 1; + position_slot = 1; } else if (match_offset == queue->R2) { swap(queue->R0, queue->R2); - formatted_offset = 2; - position_slot = 2; + position_slot = 2; } else { /* Not a repeated offset. */ /* offsets of 0, 1, and 2 are reserved for the repeated offset * codes, so non-repeated offsets must be encoded as 3+. The * minimum offset is 1, so encode the offsets offset by 2. */ - formatted_offset = match_offset + LZX_MIN_MATCH; + unsigned formatted_offset = match_offset + LZX_MIN_MATCH; queue->R2 = queue->R1; queue->R1 = queue->R0; @@ -320,7 +305,7 @@ static int lzx_write_match(struct output_bitstream *out, int block_type, wimlib_assert(position_slot < LZX_NUM_POSITION_SLOTS); - num_extra_bits = lzx_extra_bits[position_slot]; + num_extra_bits = lzx_get_num_extra_bits(position_slot); /* For aligned offset blocks with at least 3 extra bits, output the * verbatim bits literally, then the aligned bits encoded using the @@ -417,7 +402,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out, { /* Frequencies of the length symbols, including the RLE symbols (NOT the * actual lengths themselves). */ - unsigned pretree_freqs[LZX_PRETREE_NUM_SYMBOLS]; + freq_t pretree_freqs[LZX_PRETREE_NUM_SYMBOLS]; u8 pretree_lens[LZX_PRETREE_NUM_SYMBOLS]; u16 pretree_codewords[LZX_PRETREE_NUM_SYMBOLS]; u8 output_syms[num_symbols * 2]; @@ -596,41 +581,37 @@ static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs, codes->aligned_codewords); } -/* Do the 'E8' preprocessing, where the targets of x86 CALL instructions were - * changed from relative offsets to absolute offsets. This type of - * preprocessing can be used on any binary data even if it is not actually - * machine code. It seems to always be used in WIM files, even though there is - * no bit to indicate that it actually is used, unlike in the LZX compressed - * format as used in other file formats such as the cabinet format, where a bit - * is reserved for that purpose. */ -static void do_call_insn_preprocessing(u8 uncompressed_data[], - unsigned uncompressed_data_len) +static void do_call_insn_translation(u32 *call_insn_target, int input_pos, + int32_t file_size) { - int i = 0; - int file_size = LZX_MAGIC_FILESIZE; - int32_t rel_offset; int32_t abs_offset; + int32_t rel_offset; - /* Not enabled in the last 6 bytes, which means the 5-byte call - * instruction cannot start in the last *10* bytes. */ - while (i < uncompressed_data_len - 10) { - if (uncompressed_data[i] != 0xe8) { - i++; - continue; + rel_offset = le32_to_cpu(*call_insn_target); + if (rel_offset >= -input_pos && rel_offset < file_size) { + if (rel_offset < file_size - input_pos) { + /* "good translation" */ + abs_offset = rel_offset + input_pos; + } else { + /* "compensating translation" */ + abs_offset = rel_offset - file_size; } - rel_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1)); - - if (rel_offset >= -i && rel_offset < file_size) { - if (rel_offset < file_size - i) { - /* "good translation" */ - abs_offset = rel_offset + i; - } else { - /* "compensating translation" */ - abs_offset = rel_offset - file_size; - } - *(int32_t*)(uncompressed_data + i + 1) = cpu_to_le32(abs_offset); + *call_insn_target = cpu_to_le32(abs_offset); + } +} + +/* This is the reverse of undo_call_insn_preprocessing() in lzx-decompress.c. + * See the comment above that function for more information. */ +static void do_call_insn_preprocessing(u8 uncompressed_data[], + int uncompressed_data_len) +{ + for (int i = 0; i < uncompressed_data_len - 10; i++) { + if (uncompressed_data[i] == 0xe8) { + do_call_insn_translation((u32*)&uncompressed_data[i + 1], + i, + LZX_WIM_MAGIC_FILESIZE); + i += 4; } - i += 5; } } @@ -670,7 +651,7 @@ int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len, void *compressed_data, unsigned *compressed_len_ret) { struct output_bitstream ostream; - u8 uncompressed_data[uncompressed_len + LZX_MAX_MATCH]; + u8 uncompressed_data[uncompressed_len + 8]; struct lzx_freq_tables freq_tabs; struct lzx_codes codes; u32 match_tab[uncompressed_len];