X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzx-decompress.c;h=13b00761eff664eaf4b68b6350c35620c9d12c4e;hp=6e4098fffa1c218aae813ec30f558683be4a5df2;hb=a2993f836221de10e3f3e220d38aa4b72d367a61;hpb=40beb80283a2df7af88c8359ca41adb814585e9a diff --git a/src/lzx-decompress.c b/src/lzx-decompress.c index 6e4098ff..13b00761 100644 --- a/src/lzx-decompress.c +++ b/src/lzx-decompress.c @@ -233,7 +233,7 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[], char value; ret = read_huffsym_using_pretree(istream, pretree_decode_table, - pretree_lens, &tree_code); + pretree_lens, &tree_code); if (ret != 0) return ret; switch (tree_code) { @@ -264,10 +264,10 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[], if (ret != 0) return ret; num_same += 4; - ret = read_huffsym_using_pretree(istream, - pretree_decode_table, - pretree_lens, &code); + pretree_decode_table, + pretree_lens, + &code); if (ret != 0) return ret; value = (char)*lens - (char)code; @@ -313,12 +313,11 @@ static int lzx_read_block_header(struct input_bitstream *istream, struct lru_queue *queue) { int ret; - int block_type; + unsigned block_type; unsigned block_size; - int s; - int i; + unsigned s; + unsigned i; unsigned len; - int32_t R[3]; ret = bitstream_ensure_bits(istream, 4); if (ret != 0) { @@ -335,8 +334,8 @@ static int lzx_read_block_header(struct input_bitstream *istream, * 16 bits, indicated by a 0 bit. */ s = bitstream_read_bits_nocheck(istream, 1); - if (s == 1) { - block_size = 1 << 15; + if (s) { + block_size = 32768; } else { ret = bitstream_read_bits(istream, 16, &block_size); if (ret != 0) @@ -438,24 +437,39 @@ static int lzx_read_block_header(struct input_bitstream *istream, "Huffman tree"); return ret; } - + /* The bitstream of compressed literals and matches for this + * block directly follows and will be read in + * lzx_decompress_block(). */ break; - case LZX_BLOCKTYPE_UNCOMPRESSED: LZX_DEBUG("Found uncompressed block."); - ret = align_input_bitstream(istream, true); - if (ret != 0) - return ret; - ret = bitstream_read_bytes(istream, sizeof(R), R); - if (ret != 0) - return ret; - queue->R0 = le32_to_cpu(R[0]); - queue->R1 = le32_to_cpu(R[1]); - queue->R2 = le32_to_cpu(R[2]); + /* Before reading the three LRU match offsets from the + * uncompressed block header, the stream needs to be aligned on + * a 16-bit boundary. But, unexpectedly, if the stream is + * *already* aligned, the correct thing to do is to throw away + * the next 16 bits. */ + if (istream->bitsleft == 0) { + if (istream->data_bytes_left < 14) + return -1; + istream->data += 2; + istream->data_bytes_left -= 2; + } else { + if (istream->data_bytes_left < 12) + return -1; + istream->bitsleft = 0; + istream->bitbuf = 0; + } + queue->R0 = le32_to_cpu(*(u32*)(istream->data + 0)); + queue->R1 = le32_to_cpu(*(u32*)(istream->data + 4)); + queue->R2 = le32_to_cpu(*(u32*)(istream->data + 8)); + istream->data += 12; + istream->data_bytes_left -= 12; + /* The uncompressed data of this block directly follows and will + * be read in lzx_decompress(). */ break; default: - LZX_DEBUG("Found invalid block."); - return 1; + ERROR("lzx_decompress(): Found invalid block"); + return -1; } *block_type_ret = block_type; *block_size_ret = block_size; @@ -463,32 +477,43 @@ static int lzx_read_block_header(struct input_bitstream *istream, } /* - * Decodes a compressed literal match value. It refers to some match_offset to - * a point earlier in the window, and some match_len, for which the data is to - * be copied to the current position in the window. + * Decodes a compressed match from a block of LZX-compressed data. A match + * refers to some match_offset to a point earlier in the window as well as some + * match_len, for which the data is to be copied to the current position in the + * window. * * @main_element: The start of the match data, as decoded using the main - * tree. - * @block_type: The type of the block (LZX_BLOCKTYPE_ALIGNED or - * LZX_BLOCKTYPE_VERBATIM) + * tree. + * + * @block_type: The type of the block (LZX_BLOCKTYPE_ALIGNED or + * LZX_BLOCKTYPE_VERBATIM) + * * @bytes_remaining: The amount of uncompressed data remaining to be - * uncompressed. It is an error if the match - * is longer than @bytes_remaining. - * @window: A pointer to the window into which the uncompressed + * uncompressed in this block. It is an error if the match + * is longer than this number. + * + * @window: A pointer to the window into which the uncompressed * data is being written. - * @window_pos: The current position in the window. - * @tables: Contains the Huffman tables for the block (main, - * length, and also aligned offset only for - * LZX_BLOCKTYPE_ALIGNED) - * @queue: The least-recently used queue for match offsets. - * @istream: The input bitstream. - * - * Returns the length of the match, or -1 on error (match would exceed - * the amount of data needing to be uncompressed, or match refers to data before - * the window, or the input bitstream ended unexpectedly). + * + * @window_pos: The current byte offset in the window. + * + * @tables: The Huffman decoding tables for this LZX block (main + * code, length code, and for LZX_BLOCKTYPE_ALIGNED blocks, + * also the aligned offset code). + * + * @queue: The least-recently used queue for match offsets. + * + * @istream: The input bitstream. + * + * Returns the length of the match, or a negative number on error. The possible + * error cases are: + * - Match would exceed the amount of data remaining to be uncompressed. + * - Match refers to data before the window. + * - The input bitstream ended unexpectedly. */ -static int lzx_decode_match(int main_element, int block_type, - int bytes_remaining, u8 *window, int window_pos, +static int lzx_decode_match(unsigned main_element, int block_type, + unsigned bytes_remaining, u8 *window, + unsigned window_pos, const struct lzx_tables *tables, struct lru_queue *queue, struct input_bitstream *istream) @@ -501,8 +526,8 @@ static int lzx_decode_match(int main_element, int block_type, unsigned num_extra_bits; unsigned verbatim_bits; unsigned aligned_bits; + unsigned i; int ret; - int i; u8 *match_dest; u8 *match_src; @@ -522,9 +547,9 @@ static int lzx_decode_match(int main_element, int block_type, match_len = LZX_MIN_MATCH + length_header; if (length_header == LZX_NUM_PRIMARY_LENS) { ret = read_huffsym_using_lentree(istream, tables, - &additional_len); + &additional_len); if (ret != 0) - return -1; + return ret; match_len += additional_len; } @@ -554,7 +579,7 @@ static int lzx_decode_match(int main_element, int block_type, * decode the match offset. */ /* Look up the number of extra bits that need to be read. */ - num_extra_bits = lzx_extra_bits[position_slot]; + num_extra_bits = lzx_get_num_extra_bits(position_slot); /* For aligned blocks, if there are at least 3 extra bits, the * actual number of extra bits is 3 less, and they encode a @@ -571,16 +596,16 @@ static int lzx_decode_match(int main_element, int block_type, * num_extra_bits == 3, the assignment to verbatim_bits * will just set it to 0. ) */ ret = bitstream_read_bits(istream, num_extra_bits - 3, - &verbatim_bits); + &verbatim_bits); if (ret != 0) - return -1; + return ret; verbatim_bits <<= 3; ret = read_huffsym_using_alignedtree(istream, tables, &aligned_bits); if (ret != 0) - return -1; + return ret; } else { /* For non-aligned blocks, or for aligned blocks with * less than 3 extra bits, the extra bits are added @@ -589,14 +614,14 @@ static int lzx_decode_match(int main_element, int block_type, ret = bitstream_read_bits(istream, num_extra_bits, &verbatim_bits); if (ret != 0) - return -1; + return ret; aligned_bits = 0; } /* Calculate the match offset. */ - match_offset = lzx_position_base[position_slot] + verbatim_bits + - aligned_bits - 2; + match_offset = lzx_position_base[position_slot] + + verbatim_bits + aligned_bits - 2; /* Update the LRU queue. */ queue->R2 = queue->R1; @@ -612,14 +637,14 @@ static int lzx_decode_match(int main_element, int block_type, match_src = match_dest - match_offset; if (match_len > bytes_remaining) { - ERROR("lzx_decode_match(): Match of length %d bytes overflows " + ERROR("lzx_decode_match(): Match of length %u bytes overflows " "uncompressed block size", match_len); return -1; } if (match_src < window) { - ERROR("lzx_decode_match(): Match of length %d bytes references " - "data before window (match_offset = %d, window_pos = %d)", + ERROR("lzx_decode_match(): Match of length %u bytes references " + "data before window (match_offset = %u, window_pos = %u)", match_len, match_offset, window_pos); return -1; } @@ -643,49 +668,62 @@ static int lzx_decode_match(int main_element, int block_type, return match_len; } - - -/* Undo the 'E8' preprocessing, where the targets of x86 CALL instructions were - * changed from relative offsets to absolute offsets. This type of - * preprocessing can be used on any binary data even if it is not actually - * machine code. It seems to always be used in WIM files, even though there is - * no bit to indicate that it actually is used, unlike in the LZX compressed - * format as used in other file formats, where a bit is reserved for that - * purpose. */ -static void undo_call_insn_preprocessing(u8 uncompressed_data[], - unsigned uncompressed_data_len) +static void undo_call_insn_translation(u32 *call_insn_target, int input_pos, + int32_t file_size) { - int i = 0; - int file_size = LZX_MAGIC_FILESIZE; int32_t abs_offset; int32_t rel_offset; - /* Not enabled in the last 6 bytes, which means the 5-byte call - * instruction cannot start in the last *10* bytes. */ - while (i < uncompressed_data_len - 10) { - if (uncompressed_data[i] != 0xe8) { - i++; - continue; + abs_offset = le32_to_cpu(*call_insn_target); + if (abs_offset >= -input_pos && abs_offset < file_size) { + if (abs_offset >= 0) { + /* "good translation" */ + rel_offset = abs_offset - input_pos; + } else { + /* "compensating translation" */ + rel_offset = abs_offset + file_size; } - abs_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1)); - - if (abs_offset >= -i && abs_offset < file_size) { - if (abs_offset >= 0) { - /* "good translation" */ - rel_offset = abs_offset - i; - } else { - /* "compensating translation" */ - rel_offset = abs_offset + file_size; - } - *(int32_t*)(uncompressed_data + i + 1) = - cpu_to_le32(rel_offset); + *call_insn_target = cpu_to_le32(rel_offset); + } +} + +/* Undo the 'E8' preprocessing, where the targets of x86 CALL instructions were + * changed from relative offsets to absolute offsets. + * + * Note that this call instruction preprocessing can and will be used on any + * data even if it is not actually x86 machine code. In fact, this type of + * preprocessing appears to always be used in LZX-compressed resources in WIM + * files; there is no bit to indicate whether it is used or not, unlike in the + * LZX compressed format as used in cabinet files, where a bit is reserved for + * that purpose. + * + * Call instruction preprocessing is disabled in the last 6 bytes of the + * uncompressed data, which really means the 5-byte call instruction cannot + * start in the last 10 bytes of the uncompressed data. This is one of the + * errors in the LZX documentation. + * + * Call instruction preprocessing does not appear to be disabled after the + * 32768th chunk of a WIM stream, which is apparently is yet another difference + * from the LZX compression used in cabinet files. + * + * Call instruction processing is supposed to take the file size as a parameter, + * as it is used in calculating the translated jump targets. But in WIM files, + * this file size is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).*/ +static void undo_call_insn_preprocessing(u8 uncompressed_data[], + int uncompressed_data_len) +{ + for (int i = 0; i < uncompressed_data_len - 10; i++) { + if (uncompressed_data[i] == 0xe8) { + undo_call_insn_translation((u32*)&uncompressed_data[i + 1], + i, + LZX_WIM_MAGIC_FILESIZE); + i += 4; } - i += 5; } } /* - * Decompresses a compressed block of data from which the header has already + * Decompresses a LZX-compressed block of data from which the header has already * been read. * * @block_type: The type of the block (LZX_BLOCKTYPE_VERBATIM or @@ -699,20 +737,20 @@ static void undo_call_insn_preprocessing(u8 uncompressed_data[], * @queue: The least-recently-used queue for match offsets. * @istream: The input bitstream for the compressed literals. */ -static int lzx_decompress_block(int block_type, int block_size, u8 *window, - int window_pos, +static int lzx_decompress_block(int block_type, unsigned block_size, + u8 *window, + unsigned window_pos, const struct lzx_tables *tables, struct lru_queue *queue, struct input_bitstream *istream) { - unsigned bytes_remaining; unsigned main_element; - int match_len; + unsigned end; int ret; + int match_len; - bytes_remaining = block_size; - while (bytes_remaining > 0) { - + end = window_pos + block_size; + while (window_pos < end) { ret = read_huffsym_using_maintree(istream, tables, &main_element); if (ret != 0) @@ -720,35 +758,42 @@ static int lzx_decompress_block(int block_type, int block_size, u8 *window, if (main_element < LZX_NUM_CHARS) { /* literal: 0 to LZX_NUM_CHARS - 1 */ - window[window_pos + block_size - bytes_remaining] = - main_element; - bytes_remaining--; + window[window_pos++] = main_element; } else { /* match: LZX_NUM_CHARS to LZX_MAINTREE_NUM_SYMBOLS - 1 */ match_len = lzx_decode_match(main_element, - block_type, bytes_remaining, window, - block_size + window_pos - - bytes_remaining, - tables, queue, istream); - if (match_len == -1) - return 1; - - bytes_remaining -= match_len; + block_type, + end - window_pos, + window, + window_pos, + tables, + queue, + istream); + if (match_len < 0) + return match_len; + window_pos += match_len; } } return 0; } /* - * Decompresses a block of LZX-compressed data using a window size of 32768. + * Decompresses a block of LZX-compressed data as used in the WIM file format. + * + * Note that this will NOT work unmodified for LZX as used in the cabinet + * format, which is not the same as in the WIM format! * * @compressed_data: A pointer to the compressed data. + * * @compressed_len: The length of the compressed data, in bytes. + * * @uncompressed_data: A pointer to the buffer into which to write the - * uncompressed data. - * @uncompressed_len: The length of the uncompressed data. + * uncompressed data. + * + * @uncompressed_len: The length of the uncompressed data. It must be + * 32768 bytes or less. * - * Return non-zero on failure. + * Return 0 on success; non-zero on failure. */ int lzx_decompress(const void *compressed_data, unsigned compressed_len, void *uncompressed_data, unsigned uncompressed_len) @@ -756,10 +801,11 @@ int lzx_decompress(const void *compressed_data, unsigned compressed_len, struct lzx_tables tables; struct input_bitstream istream; struct lru_queue queue; - unsigned bytes_remaining; + unsigned window_pos; unsigned block_size; unsigned block_type; int ret; + bool e8_preprocessing_done; LZX_DEBUG("lzx_decompress (compressed_data = %p, compressed_len = %d, " "uncompressed_data = %p, uncompressed_len = %d).", @@ -773,31 +819,34 @@ int lzx_decompress(const void *compressed_data, unsigned compressed_len, queue.R0 = 1; queue.R1 = 1; queue.R2 = 1; - bytes_remaining = uncompressed_len; - init_input_bitstream(&istream, compressed_data, compressed_len); + e8_preprocessing_done = false; /* Set to true if there may be 0xe8 bytes + in the uncompressed data. */ + /* The compressed data will consist of one or more blocks. The * following loop decompresses one block, and it runs until there all * the compressed data has been decompressed, so there are no more * blocks. */ - while (bytes_remaining != 0) { - + for (window_pos = 0; + window_pos < uncompressed_len; + window_pos += block_size) + { LZX_DEBUG("Reading block header."); ret = lzx_read_block_header(&istream, &block_size, &block_type, &tables, &queue); if (ret != 0) return ret; - LZX_DEBUG("block_size = %u, bytes_remaining = %u", - block_size, bytes_remaining); + LZX_DEBUG("block_size = %u, window_pos = %u", + block_size, window_pos); - if (block_size > bytes_remaining) { + if (block_size > uncompressed_len - window_pos) { ERROR("lzx_decompress(): Expected a block size of at " "most %u bytes (found %u bytes)", - bytes_remaining, block_size); - return 1; + uncompressed_len - window_pos, block_size); + return -1; } switch (block_type) { @@ -807,41 +856,42 @@ int lzx_decompress(const void *compressed_data, unsigned compressed_len, LZX_DEBUG("LZX_BLOCKTYPE_VERBATIM"); else LZX_DEBUG("LZX_BLOCKTYPE_ALIGNED"); - ret = lzx_decompress_block(block_type, block_size, uncompressed_data, - uncompressed_len - - bytes_remaining, - &tables, &queue, &istream); + window_pos, + &tables, + &queue, + &istream); if (ret != 0) return ret; + if (tables.maintree_lens[0xe8] != 0) + e8_preprocessing_done = true; break; case LZX_BLOCKTYPE_UNCOMPRESSED: LZX_DEBUG("LZX_BLOCKTYPE_UNCOMPRESSED"); - ret = bitstream_read_bytes(&istream, block_size, - uncompressed_data + - uncompressed_len - - bytes_remaining); - if (ret != 0) - return ret; - if (block_size & 1) - align_input_bitstream(&istream, false); - break; - default: - wimlib_assert(0); + if (istream.data_bytes_left < block_size) { + ERROR("Unexpected end of input when " + "reading %u bytes from LZX bitstream " + "(only have %u bytes left)", + block_size, istream.data_bytes_left); + return -1; + } + memcpy(&((u8*)uncompressed_data)[window_pos], istream.data, + block_size); + istream.data += block_size; + istream.data_bytes_left -= block_size; + /* Re-align bitstream if an odd number of bytes were + * read. */ + if (istream.data_bytes_left && (block_size & 1)) { + istream.data_bytes_left--; + istream.data++; + } + e8_preprocessing_done = true; break; } - - bytes_remaining -= block_size; - - if (bytes_remaining != 0) - LZX_DEBUG("%d bytes remaining.", bytes_remaining); } - - if (uncompressed_len >= 10) - undo_call_insn_preprocessing(uncompressed_data, - uncompressed_len); - + if (e8_preprocessing_done) + undo_call_insn_preprocessing(uncompressed_data, uncompressed_len); return 0; }