X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flzx-decomp.c;h=73e68127c0d6fa95bf8c9e62e9e49656dc704ea5;hb=d212325562fd5de72b7c768532bd8c631db897d3;hp=92bfb2e9413413a32a6e297878ec4369a8b23d81;hpb=d0e7f039e4ab206b9fd973c983e3fb841fcd2bf2;p=wimlib diff --git a/src/lzx-decomp.c b/src/lzx-decomp.c index 92bfb2e9..73e68127 100644 --- a/src/lzx-decomp.c +++ b/src/lzx-decomp.c @@ -1,9 +1,9 @@ /* * lzx-decomp.c * - * Routines for LZX decompression. The LZX format has many similarities to the - * DEFLATE format used in zlib and gzip, but it's not quite the same. - * + * LZX decompression routines, originally based on code taken from cabextract + * v0.5, which was, itself, a modified version of the lzx decompression code + * from unlzx. */ /* @@ -26,38 +26,39 @@ */ /* - * This file has been modified from code taken from cabextract v0.5, which was, - * itself, a modified version of the lzx decompression code from unlzx. The - * code has been customized for wimlib. + * LZX is a LZ77 and Huffman-code based compression format that has many + * similarities to the DEFLATE format used in zlib. The compression ratio is as + * good or better than DEFLATE. However, in WIM files only up to 32768 bytes of + * data can ever compressed be in the same LZX block, so a .tar.gz file could + * potentially be smaller than a WIM file that uses LZX compression because it + * can use a larger LZ77 window size. * * Some notes on the LZX compression format as used in Windows Imaging (WIM) * files: * - * A compressed WIM file resource consists of a table of chunk offsets followed - * by compressed chunks. All compressed chunks except the last decompress to - * WIM_CHUNK_SIZE (= 32768) bytes. This is quite similar to the cabinet (.cab) - * file format, but they are not the same (at least based on M$'s - * documentation). According to the documentation, in the cabinet format, the - * LZX block size is independent from the CFDATA blocks and may span several - * CFDATA blocks. However, for WIM file resources, I have seen no case of a LZX - * block spanning multiple WIM chunks. This is probably done to make it easier - * to randomly access the compressed file resources. WIMLIB in fact makes use - * of this feature to allow semi-random access to file resources in the - * read_resource() function. + * A compressed WIM resource consists of a table of chunk offsets followed by + * the compressed chunks themselves. All compressed chunks except possibly the + * last decompress to WIM_CHUNK_SIZE (= 32768) bytes. This is quite similar to + * the cabinet (.cab) file format, but they are not the same. According to the + * cabinet format documentation, the LZX block size is independent from the + * CFDATA blocks, and a LZX block may span several CFDATA blocks. However, in + * WIMs, LZX blocks do not appear to ever span multiple WIM chunks. Note that + * this means any WIM chunk may be decompressed or compressed independently from + * any other chunk, which is convenient. * - * Usually a WIM chunk will contain only one LZX block, but on rare occasions it - * may contain multiple LZX block. The LZX block are usually the aligned block - * type or verbatim block type, but can (very rarely) be the uncompressed block - * type. The size of a LZX block is specified by 1 or 17 bits following the 3 - * bits that specify the block type. A '1' means to use the default block size - * (equal to 32768), while a '0' means that the block size is given by the next - * 16 bits. + * A LZX compressed WIM chunk contains one or more LZX blocks of the aligned, + * verbatim, or uncompressed block types. For aligned and verbatim blocks, the + * size of the block in uncompressed bytes is specified by a bit following the 3 + * bits that specify the block type, possibly followed by an additional 16 bits. + * '1' means to use the default block size (equal to 32768, the size of a WIM + * chunk--- and this seems to only be valid for the first LZX block in a WIM + * chunk), while '0' means that the block size is provided by the next 16 bits. * - * The cabinet format, as documented, allows for the possibility that a CFDATA - * chunk is up to 6144 bytes larger than the uncompressed data. In the WIM - * format, however, it appears that every chunk that would be 32768 bytes or - * more when compressed, is actually stored uncompressed. This is not - * documented by M$. + * The cabinet format, as documented, allows for the possibility that a + * compressed CFDATA chunk is up to 6144 bytes larger than the data it + * uncompresses to. However, in the WIM format it appears that every chunk that + * would be 32768 bytes or more when compressed is actually stored fully + * uncompressed. * * The 'e8' preprocessing step that changes x86 call instructions to use * absolute offsets instead of relative offsets relies on a filesize parameter. @@ -65,11 +66,10 @@ * the file resource could be used for this purpose), and instead a magic file * size of 12000000 is used. The 'e8' preprocessing is always done, and there * is no bit to indicate whether it is done or not. - * */ /* - * Some more notes about errors in Microsoft's documentation: + * Some more notes about errors in Microsoft's LZX documentation: * * Microsoft's LZX document and their implementation of the com.ms.util.cab Java * package do not concur. @@ -108,9 +108,7 @@ #include "util.h" #include "lzx.h" - #include "decomp.h" - #include /* Huffman decoding tables and maps from symbols to code lengths. */ @@ -137,7 +135,7 @@ struct lzx_tables { */ static inline int read_huffsym_using_pretree(struct input_bitstream *istream, const u16 pretree_decode_table[], - const u8 pretree_lens[], uint *n) + const u8 pretree_lens[], unsigned *n) { return read_huffsym(istream, pretree_decode_table, pretree_lens, LZX_PRETREE_NUM_SYMBOLS, LZX_PRETREE_TABLEBITS, n, @@ -147,7 +145,7 @@ static inline int read_huffsym_using_pretree(struct input_bitstream *istream, /* Reads a Huffman-encoded symbol using the main tree. */ static inline int read_huffsym_using_maintree(struct input_bitstream *istream, const struct lzx_tables *tables, - uint *n) + unsigned *n) { return read_huffsym(istream, tables->maintree_decode_table, tables->maintree_lens, LZX_MAINTREE_NUM_SYMBOLS, @@ -157,7 +155,7 @@ static inline int read_huffsym_using_maintree(struct input_bitstream *istream, /* Reads a Huffman-encoded symbol using the length tree. */ static inline int read_huffsym_using_lentree(struct input_bitstream *istream, const struct lzx_tables *tables, - uint *n) + unsigned *n) { return read_huffsym(istream, tables->lentree_decode_table, tables->lentree_lens, LZX_LENTREE_NUM_SYMBOLS, @@ -167,7 +165,7 @@ static inline int read_huffsym_using_lentree(struct input_bitstream *istream, /* Reads a Huffman-encoded symbol using the aligned offset tree. */ static inline int read_huffsym_using_alignedtree(struct input_bitstream *istream, const struct lzx_tables *tables, - uint *n) + unsigned *n) { return read_huffsym(istream, tables->alignedtree_decode_table, tables->alignedtree_lens, @@ -188,14 +186,14 @@ static inline int read_huffsym_using_alignedtree(struct input_bitstream *istream * */ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[], - uint num_lens) + unsigned num_lens) { /* Declare the decoding table and length table for the pretree. */ u16 pretree_decode_table[(1 << LZX_PRETREE_TABLEBITS) + (LZX_PRETREE_NUM_SYMBOLS * 2)]; u8 pretree_lens[LZX_PRETREE_NUM_SYMBOLS]; - uint i; - uint len; + unsigned i; + unsigned len; int ret; /* Read the code lengths of the pretree codes. There are 20 lengths of @@ -228,10 +226,10 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[], * some number of the next lengths are all 0, or some number of * the next lengths are all equal to the next symbol in the * input. */ - uint tree_code; - uint num_zeroes; - uint code; - uint num_same; + unsigned tree_code; + unsigned num_zeroes; + unsigned code; + unsigned num_same; char value; ret = read_huffsym_using_pretree(istream, pretree_decode_table, @@ -309,16 +307,17 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[], * blocks, which contain this information in the header) */ static int lzx_read_block_header(struct input_bitstream *istream, - int *block_size_ret, int *block_type_ret, + unsigned *block_size_ret, + unsigned *block_type_ret, struct lzx_tables *tables, struct lru_queue *queue) { int ret; int block_type; - uint block_size; + unsigned block_size; int s; int i; - uint len; + unsigned len; int32_t R[3]; ret = bitstream_ensure_bits(istream, 4); @@ -494,14 +493,14 @@ static int lzx_decode_match(int main_element, int block_type, struct lru_queue *queue, struct input_bitstream *istream) { - uint length_header; - uint position_slot; - uint match_len; - uint match_offset; - uint additional_len; - uint num_extra_bits; - uint verbatim_bits; - uint aligned_bits; + unsigned length_header; + unsigned position_slot; + unsigned match_len; + unsigned match_offset; + unsigned additional_len; + unsigned num_extra_bits; + unsigned verbatim_bits; + unsigned aligned_bits; int ret; int i; u8 *match_dest; @@ -654,7 +653,7 @@ static int lzx_decode_match(int main_element, int block_type, * format as used in other file formats, where a bit is reserved for that * purpose. */ static void undo_call_insn_preprocessing(u8 uncompressed_data[], - uint uncompressed_data_len) + unsigned uncompressed_data_len) { int i = 0; int file_size = LZX_MAGIC_FILESIZE; @@ -706,8 +705,8 @@ static int lzx_decompress_block(int block_type, int block_size, u8 *window, struct lru_queue *queue, struct input_bitstream *istream) { - uint bytes_remaining; - uint main_element; + unsigned bytes_remaining; + unsigned main_element; int match_len; int ret; @@ -751,16 +750,16 @@ static int lzx_decompress_block(int block_type, int block_size, u8 *window, * * Return non-zero on failure. */ -int lzx_decompress(const void *compressed_data, uint compressed_len, - void *uncompressed_data, uint uncompressed_len) +int lzx_decompress(const void *compressed_data, unsigned compressed_len, + void *uncompressed_data, unsigned uncompressed_len) { - struct lzx_tables tables; - struct input_bitstream istream; - struct lru_queue queue; - uint bytes_remaining; + struct lzx_tables tables; + struct input_bitstream istream; + struct lru_queue queue; + unsigned bytes_remaining; + unsigned block_size; + unsigned block_type; int ret; - int block_size; - int block_type; LZX_DEBUG("lzx_decompress (compressed_data = %p, compressed_len = %d, " "uncompressed_data = %p, uncompressed_len = %d).", @@ -786,17 +785,17 @@ int lzx_decompress(const void *compressed_data, uint compressed_len, while (bytes_remaining != 0) { LZX_DEBUG("Reading block header."); - ret = lzx_read_block_header(&istream, &block_size, &block_type, - &tables, &queue); + ret = lzx_read_block_header(&istream, &block_size, + &block_type, &tables, &queue); if (ret != 0) return ret; - LZX_DEBUG("block_size = %d, bytes_remaining = %d.", + LZX_DEBUG("block_size = %u, bytes_remaining = %u", block_size, bytes_remaining); if (block_size > bytes_remaining) { ERROR("lzx_decompress(): Expected a block size of at " - "most %d bytes (found %d bytes)", + "most %u bytes (found %u bytes)", bytes_remaining, block_size); return 1; } @@ -838,7 +837,6 @@ int lzx_decompress(const void *compressed_data, uint compressed_len, if (bytes_remaining != 0) LZX_DEBUG("%d bytes remaining.", bytes_remaining); - } if (uncompressed_len >= 10)