X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flzms-decompress.c;h=7254091449c052442bae510444f93d7f92b9a9d0;hb=d20508c65ee45d265f48964ba312f10f8c67a9c8;hp=532dfb59c9393f3b486c4c3661e07af283e6c951;hpb=543d8a6b89049aff65fa7eabf5f4b376a196c8d2;p=wimlib diff --git a/src/lzms-decompress.c b/src/lzms-decompress.c index 532dfb59..72540914 100644 --- a/src/lzms-decompress.c +++ b/src/lzms-decompress.c @@ -160,18 +160,28 @@ * have equal frequency. Following that, each code must be rebuilt whenever a * certain number of symbols has been decoded with it. * - * In general, multiple valid Huffman codes can be constructed from a set of - * symbol frequencies. Like other compression formats such as XPRESS, LZX, and - * DEFLATE, the LZMS format solves this ambiguity by requiring that all Huffman - * codes be constructed in canonical form. This form requires that same-length - * codewords be lexicographically ordered the same way as the corresponding - * symbols and that all shorter codewords lexicographically precede longer - * codewords. + * Like other compression formats such as XPRESS, LZX, and DEFLATE, the LZMS + * format requires that all Huffman codes be constructed in canonical form. + * This form requires that same-length codewords be lexicographically ordered + * the same way as the corresponding symbols and that all shorter codewords + * lexicographically precede longer codewords. Such a code can be constructed + * directly from codeword lengths, although in LZMS this is not actually + * necessary because the codes are built using adaptive symbol frequencies. * - * Codewords in all the LZMS Huffman codes are limited to 15 bits. If the - * canonical code for a given set of symbol frequencies has any codewords longer - * than 15 bits, then all frequencies must be divided by 2, rounding up, and the - * code construction must be attempted again. + * Even with the canonical code restriction, the same frequencies can be used to + * construct multiple valid Huffman codes. Therefore, the decompressor needs to + * construct the right one. Specifically, the LZMS format requires that the + * Huffman code be constructed as if the well-known priority queue algorithm is + * used and frequency ties are always broken in favor of leaf nodes. See + * make_canonical_huffman_code() in compress_common.c for more information. + * + * Codewords in LZMS are guaranteed to not exceed 15 bits. The format otherwise + * places no restrictions on codeword length. Therefore, the Huffman code + * construction algorithm that a correct LZMS decompressor uses need not + * implement length-limited code construction. But if it does (e.g. by virtue + * of being shared among multiple compression algorithms), the details of how it + * does so are unimportant, provided that the maximum codeword length parameter + * is set to at least 15 bits. * * An LZMS-compressed block seemingly cannot have a compressed size greater than * or equal to the uncompressed size. In such cases the block must be stored @@ -190,10 +200,10 @@ # include "config.h" #endif -#include "wimlib.h" #include "wimlib/compress_common.h" #include "wimlib/decompressor_ops.h" #include "wimlib/decompress_common.h" +#include "wimlib/error.h" #include "wimlib/lzms.h" #include "wimlib/util.h" @@ -1002,16 +1012,6 @@ lzms_decompress(const void *compressed_data, size_t compressed_size, if (uncompressed_size == 0) return 0; - /* The x86 post-processor requires that the uncompressed length fit into - * a signed 32-bit integer. Also, the position slot table cannot be - * searched for a position of INT32_MAX or greater. */ - if (uncompressed_size >= INT32_MAX) { - LZMS_DEBUG("Uncompressed length too large " - "(got %zu, expected < INT32_MAX)", - uncompressed_size); - return -1; - } - /* Decode the literals and matches. */ if (lzms_decode_items(compressed_data, compressed_size, uncompressed_data, uncompressed_size, ctx)) @@ -1034,12 +1034,16 @@ lzms_free_decompressor(void *_ctx) } static int -lzms_create_decompressor(size_t max_block_size, - const struct wimlib_decompressor_params_header *params, - void **ctx_ret) +lzms_create_decompressor(size_t max_block_size, void **ctx_ret) { struct lzms_decompressor *ctx; + /* The x86 post-processor requires that the uncompressed length fit into + * a signed 32-bit integer. Also, the position slot table cannot be + * searched for a position of INT32_MAX or greater. */ + if (max_block_size >= INT32_MAX) + return WIMLIB_ERR_INVALID_PARAM; + ctx = ALIGNED_MALLOC(sizeof(struct lzms_decompressor), DECODE_TABLE_ALIGNMENT); if (ctx == NULL)