From: Eric Biggers Date: Wed, 25 Dec 2013 04:53:18 +0000 (-0600) Subject: New compression/decompression API X-Git-Tag: v1.6.0~108 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=883833a4b3dabec325edf1ca938000f91d587c00 New compression/decompression API To avoid the proliferation of functions for compressing and decompressing in different formats, allow all the compression algorithms to be accessed using a single API: Compression: - wimlib_create_compressor() - wimlib_compress() - wimlib_free_compressor() - wimlib_set_default_compressor_params() Decompression: - wimlib_create_decompressor() - wimlib_decompress() - wimlib_free_decompressor() - wimlib_set_default_decompressor_params() This also makes it easier to allocate larger blocks of memory or do other initializations in any decompressor or compressor implementation. This commit adds a skeleton for the LZMS compressor but it doesn't do anything yet. --- diff --git a/Makefile.am b/Makefile.am index 7b20b58a..4ebcf64b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -21,10 +21,11 @@ libwim_la_SOURCES = \ src/add_image.c \ src/capture_common.c \ src/compress.c \ - src/compress_chunk.c \ + src/compress_common.c \ src/compress_parallel.c \ src/compress_serial.c \ src/decompress.c \ + src/decompress_common.c \ src/delete_image.c \ src/dentry.c \ src/encoding.c \ @@ -36,6 +37,7 @@ libwim_la_SOURCES = \ src/integrity.c \ src/join.c \ src/lookup_table.c \ + src/lzms-compress.c \ src/lzms-decompress.c \ src/lz77.c \ src/divsufsort/divsufsort.c \ @@ -69,9 +71,11 @@ libwim_la_SOURCES = \ include/wimlib/callback.h \ include/wimlib/capture.h \ include/wimlib/compiler.h \ - include/wimlib/compress.h \ - include/wimlib/compress_chunks.h \ - include/wimlib/decompress.h \ + include/wimlib/compressor_ops.h \ + include/wimlib/compress_common.h \ + include/wimlib/chunk_compressor.h \ + include/wimlib/decompressor_ops.h \ + include/wimlib/decompress_common.h \ include/wimlib/dentry.h \ include/wimlib/encoding.h \ include/wimlib/endianness.h \ diff --git a/include/wimlib.h b/include/wimlib.h index afed2af1..e5efcdfa 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -330,12 +330,6 @@ * specialized function (wimlib_split()) is needed to create a split WIM. */ -/** @defgroup G_compression Compression and decompression functions - * - * @brief Functions for LZX and XPRESS compression and decompression, exported - * for convenience only. These functions normally do not need to be used. - */ - #ifndef _WIMLIB_H #define _WIMLIB_H @@ -1717,97 +1711,6 @@ struct wimlib_extract_command { int extract_flags; }; -/** @} */ -/** @ingroup G_compression - * @{ */ - -/** LZX compression parameters to pass to wimlib_lzx_alloc_context(). */ -struct wimlib_lzx_params { - /** Must be set to the size of this structure, in bytes. */ - uint32_t size_of_this; - - /** Relatively fast LZX compression algorithm with a decent compression - * ratio; the suggested default. */ -#define WIMLIB_LZX_ALGORITHM_FAST 0 - - /** Slower LZX compression algorithm that provides a better compression - * ratio. */ -#define WIMLIB_LZX_ALGORITHM_SLOW 1 - - /** Algorithm to use to perform the compression: either - * ::WIMLIB_LZX_ALGORITHM_FAST or ::WIMLIB_LZX_ALGORITHM_SLOW. The - * format is still LZX; this refers to the method the code will use to - * perform LZX-compatible compression. */ - uint32_t algorithm : 3; - - /** If set to 1, the default parameters for the specified algorithm are - * used rather than the ones specified in the following union. */ - uint32_t use_defaults : 1; - - union { - /** Parameters for the fast algorithm. */ - struct wimlib_lzx_fast_params { - uint32_t fast_reserved1[10]; - } fast; - - /** Parameters for the slow algorithm. */ - struct wimlib_lzx_slow_params { - /** If set to 1, the compressor can output length 2 - * matches. If set 0, the compressor only outputs - * matches of length 3 or greater. Suggested value: 1 - */ - uint32_t use_len2_matches : 1; - - uint32_t slow_reserved1 : 31; - - /** Matches with length (in bytes) longer than this - * value are immediately taken without spending time on - * minimum-cost measurements. Suggested value: 32. */ - uint32_t num_fast_bytes; - - /** Number of passes to compute a match/literal sequence - * for each LZX block. This is for an iterative - * algorithm that attempts to minimize the cost of the - * match/literal sequence by using a cost model provided - * by the previous iteration. Must be at least 1. - * Suggested value: 2. */ - uint32_t num_optim_passes; - - /** Reserved; set to 0. */ - uint32_t slow_reserved_blocksplit; - - /** Maximum depth to search for matches at each - * position. Suggested value: 50. */ - uint32_t max_search_depth; - - /** Maximum number of potentially good matches to - * consider for each position. Suggested value: 3. */ - uint32_t max_matches_per_pos; - - uint32_t slow_reserved2[2]; - - /** Assumed cost of a main symbol with zero frequency. - * Must be at least 1 and no more than 16. Suggested - * value: 15. */ - uint8_t main_nostat_cost; - - /** Assumed cost of a length symbol with zero frequency. - * Must be at least 1 and no more than 16. Suggested - * value: 15. */ - uint8_t len_nostat_cost; - - /** Assumed cost of an aligned symbol with zero - * frequency. Must be at least 1 and no more than 8. - * Suggested value: 7. */ - uint8_t aligned_nostat_cost; - - uint8_t slow_reserved3[5]; - } slow; - } alg_params; -}; - -/** Opaque LZX compression context. */ -struct wimlib_lzx_context; /** @} */ /** @ingroup G_general @@ -2787,198 +2690,6 @@ wimlib_join(const wimlib_tchar * const *swms, int wim_write_flags, wimlib_progress_func_t progress_func); -/** - * @ingroup G_compression - * - * Decompresses a block of LZMS-compressed data. - * - * This function is exported for convenience only and should only be used by - * library clients looking to make use of wimlib's compression code for another - * purpose. - * - * This decompressor only implements "raw" decompression, which decompresses a - * single LZMS-compressed block. This behavior is the same as that of - * Decompress() in the Windows 8 compression API when using a compression handle - * created with CreateDecompressor() with the Algorithm parameter specified as - * COMPRESS_ALGORITHM_LZMS | COMPRESS_RAW. Presumably, non-raw LZMS data - * is a container format from which the locations and sizes (both compressed and - * uncompressed) of the constituent blocks can be determined. - * - * This function should not be called for blocks with compressed size equal to - * uncompressed size, since such blocks are actually stored uncompressed. - * - * @param compressed_data - * Pointer to the compressed data. - * - * @param compressed_len - * Length of the compressed data, in bytes. - * - * @param uncompressed_data - * Pointer to the buffer into which to write the uncompressed data. - * - * @param uncompressed_len - * Length of the uncompressed data. - * - * @return - * 0 on success; non-zero on failure. - */ -extern int -wimlib_lzms_decompress(const void *compressed_data, unsigned compressed_len, - void *uncompressed_data, unsigned uncompressed_len); - -/** - * @ingroup G_compression - * - * Compress a chunk of a WIM resource using LZX compression. - * - * This function is exported for convenience only and should only be used by - * library clients looking to make use of wimlib's compression code for another - * purpose. - * - * @param chunk - * Uncompressed data of the chunk. - * @param chunk_size - * Size of the uncompressed chunk, in bytes. - * @param out - * Pointer to output buffer of size at least (@p chunk_size - 1) bytes. - * - * @return - * The size of the compressed data written to @p out in bytes, or 0 if the - * data could not be compressed to (@p chunk_size - 1) bytes or fewer. - * - * As a special requirement, the compression code is optimized for the WIM - * format and therefore requires (@p chunk_size <= 32768). - */ -extern unsigned -wimlib_lzx_compress(const void *chunk, unsigned chunk_size, void *out) - _wimlib_deprecated; - -/** - * @ingroup G_compression - * - * Equivalent to wimlib_lzx_compress(), but uses the specified compression - * context, allocated by wimlib_lzx_alloc_context(). - */ -extern unsigned -wimlib_lzx_compress2(const void *chunk, unsigned chunk_size, void *out, - struct wimlib_lzx_context *ctx); - -/** - * @ingroup G_compression - * - * Allocate a LZX compression context using the specified parameters. - * - * This function is exported for convenience only and should only be used by - * library clients looking to make use of wimlib's compression code for another - * purpose. - * - * @param window_size - * Size of the LZX window. Must be a power of 2 between 2^15 and 2^21, - * inclusively. - * - * @param params - * Compression parameters to use, or @c NULL to use the default parameters. - * - * @param ctx_ret - * A pointer to either @c NULL or an existing ::wimlib_lzx_context. If - * *ctx_ret == NULL, the new context is allocated. If - * *ctx_ret != NULL, the existing context is re-used if - * possible. Alternatively, this argument can itself be @c NULL to - * indicate that only parameter validation is to be performed. - * - * @return 0 on success; nonzero on error. - * - * @retval ::WIMLIB_ERR_INVALID_PARAM - * The window size or compression parameters were invalid. - * @retval ::WIMLIB_ERR_NOMEM - * Not enough memory to allocate the compression context. - */ -extern int -wimlib_lzx_alloc_context(uint32_t window_size, - const struct wimlib_lzx_params *params, - struct wimlib_lzx_context **ctx_pp); - -/** - * @ingroup G_compression - * - * Decompresses a block of LZX-compressed data as used in the WIM file format. - * - * Note that this will NOT work unmodified for LZX as used in the cabinet - * format, which is not the same as in the WIM format! - * - * This function is exported for convenience only and should only be used by - * library clients looking to make use of wimlib's compression code for another - * purpose. - * - * @param compressed_data - * Pointer to the compressed data. - * - * @param compressed_len - * Length of the compressed data, in bytes. - * - * @param uncompressed_data - * Pointer to the buffer into which to write the uncompressed data. - * - * @param uncompressed_len - * Length of the uncompressed data. It must be 32768 bytes or less. - * - * @return - * 0 on success; non-zero on failure. - */ -extern int -wimlib_lzx_decompress(const void *compressed_data, unsigned compressed_len, - void *uncompressed_data, unsigned uncompressed_len); - -/** - * @ingroup G_compression - * - * Equivalent to wimlib_lzx_decompress(), except the window size is specified in - * @p max_window_size as any power of 2 between 2^15 and 2^21, inclusively, and - * @p uncompressed_len may be any size less than or equal to @p max_window_size. - */ -extern int -wimlib_lzx_decompress2(const void *compressed_data, unsigned compressed_len, - void *uncompressed_data, unsigned uncompressed_len, - uint32_t max_window_size); - -/** - * @ingroup G_compression - * - * Free the specified LZX compression context, allocated with - * wimlib_lzx_alloc_context(). - */ -extern void -wimlib_lzx_free_context(struct wimlib_lzx_context *ctx); - -/** - * @ingroup G_compression - * - * Set the global default LZX compression parameters. - * - * @param params - * The LZX compression parameters to set. These default parameters will be - * used by any calls to wimlib_lzx_alloc_context() with @c NULL LZX - * parameters specified, as well as by any future compression performed by - * the library itself. Passing @p NULL here resets the default LZX - * parameters to their original value. - * - * @return 0 on success; nonzero on error. - * - * @retval ::WIMLIB_ERR_INVALID_PARAM - * The compression parameters were invalid. - */ -extern int -wimlib_lzx_set_default_params(const struct wimlib_lzx_params *params); - -/** - * @ingroup G_compression - * - * Free the specified LZX compression context, allocated with - * wimlib_lzx_alloc_context(). - */ -extern void -wimlib_lzx_free_context(struct wimlib_lzx_context *ctx); - /** * @ingroup G_mounting_wim_images @@ -4032,62 +3743,322 @@ wimlib_write_to_fd(WIMStruct *wim, unsigned num_threads, wimlib_progress_func_t progress_func); +/** + * @defgroup G_compression Compression and decompression functions + * + * @brief Functions for LZX, XPRESS, and LZMS compression and decompression, + * exported for convenience only, as they are already used by wimlib internally + * when appropriate. + * + * These functions can be used for general-purpose lossless data compression, + * but some limitations apply; for example, none of the compressors or + * decompressors currently support sliding windows, and there also exist + * slightly different variants of these formats that are not supported + * unmodified. + * + * @{ + */ + +/** Header for compression parameters to pass to wimlib_create_compressor() or + * wimlib_set_default_compressor_params(). */ +struct wimlib_compressor_params_header { + /** Size of the parameters, in bytes. */ + uint32_t size; + + uint32_t reserved; +}; + +/** Header for decompression parameters to pass to wimlib_create_decompressor() + * or wimlib_set_default_decompressor_params() */ +struct wimlib_decompressor_params_header { + /** Size of the parameters, in bytes. */ + uint32_t size; + + uint32_t reserved; +}; + +/** LZX compression parameters that can optionally be passed to + * wimlib_create_compressor() with the compression type + * ::WIMLIB_COMPRESSION_TYPE_LZX. */ +struct wimlib_lzx_compressor_params { + /** hdr.size Must be set to the size of this structure, in bytes. */ + struct wimlib_compressor_params_header hdr; + + /** Relatively fast LZX compression algorithm with a decent compression + * ratio; the suggested default. */ +#define WIMLIB_LZX_ALGORITHM_FAST 0 + + /** Slower LZX compression algorithm that provides a better compression + * ratio. */ +#define WIMLIB_LZX_ALGORITHM_SLOW 1 + + /** Algorithm to use to perform the compression: either + * ::WIMLIB_LZX_ALGORITHM_FAST or ::WIMLIB_LZX_ALGORITHM_SLOW. The + * format is still LZX; this refers to the method the code will use to + * perform LZX-compatible compression. */ + uint32_t algorithm : 3; + + /** If set to 1, the default parameters for the specified algorithm are + * used rather than the ones specified in the following union. */ + uint32_t use_defaults : 1; + + union { + /** Parameters for the fast algorithm. */ + struct wimlib_lzx_fast_params { + uint32_t fast_reserved1[10]; + } fast; + + /** Parameters for the slow algorithm. */ + struct wimlib_lzx_slow_params { + /** If set to 1, the compressor can output length 2 + * matches. If set 0, the compressor only outputs + * matches of length 3 or greater. Suggested value: 1 + */ + uint32_t use_len2_matches : 1; + + uint32_t slow_reserved1 : 31; + + /** Matches with length (in bytes) longer than this + * value are immediately taken without spending time on + * minimum-cost measurements. Suggested value: 32. */ + uint32_t num_fast_bytes; + + /** Number of passes to compute a match/literal sequence + * for each LZX block. This is for an iterative + * algorithm that attempts to minimize the cost of the + * match/literal sequence by using a cost model provided + * by the previous iteration. Must be at least 1. + * Suggested value: 2. */ + uint32_t num_optim_passes; + + /** Reserved; set to 0. */ + uint32_t slow_reserved_blocksplit; + + /** Maximum depth to search for matches at each + * position. Suggested value: 50. */ + uint32_t max_search_depth; + + /** Maximum number of potentially good matches to + * consider for each position. Suggested value: 3. */ + uint32_t max_matches_per_pos; + + uint32_t slow_reserved2[2]; + + /** Assumed cost of a main symbol with zero frequency. + * Must be at least 1 and no more than 16. Suggested + * value: 15. */ + uint8_t main_nostat_cost; + + /** Assumed cost of a length symbol with zero frequency. + * Must be at least 1 and no more than 16. Suggested + * value: 15. */ + uint8_t len_nostat_cost; + + /** Assumed cost of an aligned symbol with zero + * frequency. Must be at least 1 and no more than 8. + * Suggested value: 7. */ + uint8_t aligned_nostat_cost; + + uint8_t slow_reserved3[5]; + } slow; + } alg_params; +}; + +/** Opaque compressor handle. */ +struct wimlib_compressor; + +/** Opaque decompressor handle. */ +struct wimlib_decompressor; + +/** + * @ingroup G_compression + * + * Set the default compression parameters for the specified compression type. + * This will affect both explicit and wimlib-internal calls to + * wimlib_create_compressor(). + * + * @param ctype + * Compression type for which to set the default compression parameters. + * @param params + * Compression-type specific parameters. This may be @ NULL, in which case + * the "default default" parameters are restored. + * + * @return 0 on success; nonzero on error. + * + * @retval ::WIMLIB_ERR_INVALID_COMPRESSION_TYPE + * @p ctype was not a supported compression type. + */ +extern int +wimlib_set_default_compressor_params(enum wimlib_compression_type ctype, + const struct wimlib_compressor_params_header *params); + /** * @ingroup G_compression * - * Compress a chunk of data using XPRESS compression. + * Allocate a compressor for the specified compression type using the specified + * parameters. * - * This function is exported for convenience only and should only be used by - * library clients looking to make use of wimlib's compression code for another - * purpose. + * @param ctype + * Compression type for which to create the compressor. + * @param max_block_size + * Maximum block size to support. The exact meaning and allowed values for + * this parameter depend on the compression type, but it at least specifies + * the maximum allowed value for @p uncompressed_size to wimlib_compress(). + * @param extra_params + * An optional pointer to extra compressor parameters for the specified + * compression type. For LZX, a pointer to ::wimlib_lzx_compressor_params + * may be specified here. If left @c NULL, the default parameters are + * used. + * @param compressor_ret + * A location into which to return the pointer to the allocated compressor, + * which can be used for any number of calls to wimlib_compress() before + * being freed with wimlib_free_compressor(). + * + * @return 0 on success; nonzero on error. + * + * @retval ::WIMLIB_ERR_INVALID_COMPRESSION_TYPE + * @p ctype was not a supported compression type. + * @retval ::WIMLIB_ERR_INVALID_PARAM + * The compression parameters were invalid. + * @retval ::WIMLIB_ERR_NOMEM + * Insufficient memory to allocate the compressor. + */ +extern int +wimlib_create_compressor(enum wimlib_compression_type ctype, + size_t max_block_size, + const struct wimlib_compressor_params_header *extra_params, + struct wimlib_compressor **compressor_ret); + +/** + * @ingroup G_compression * - * As of wimlib v1.6.0, this function can be used with @p chunk_size greater - * than 32768 bytes and is only limited by available memory. However, the - * XPRESS format itself still caps match offsets to 65535, so if a larger chunk - * size is chosen, then the matching will effectively occur in a sliding window - * over it. + * Losslessly compress a block of data using a compressor previously created + * with wimlib_create_compressor(). * - * @param chunk - * Uncompressed data of the chunk. - * @param chunk_size - * Size of the uncompressed chunk, in bytes. - * @param out - * Pointer to output buffer of size at least (@p chunk_size - 1) bytes. + * @param uncompressed_data + * Buffer containing the data to compress. + * @param uncompressed_size + * Size, in bytes, of the data to compress. + * @param compressed_data + * Buffer into which to write the compressed data. + * @param compressed_size_avail + * Number of bytes available in @compressed_data. + * @param compressor + * A compressor previously allocated with wimlib_create_compressor(). * * @return - * The size of the compressed data written to @p out in bytes, or 0 if the - * data could not be compressed to (@p chunk_size - 1) bytes or fewer. + * The size of the compressed data, in bytes, or 0 if the input data could + * not be compressed to @compressed_size_avail or fewer bytes. */ -extern unsigned -wimlib_xpress_compress(const void *chunk, unsigned chunk_size, void *out); +extern size_t +wimlib_compress(const void *uncompressed_data, size_t uncompressed_size, + void *compressed_data, size_t compressed_size_avail, + struct wimlib_compressor *compressor); /** * @ingroup G_compression * - * Decompresses a chunk of XPRESS-compressed data. + * Free a compressor previously allocated with wimlib_create_compressor(). * - * This function is exported for convenience only and should only be used by - * library clients looking to make use of wimlib's compression code for another - * purpose. + * @param compressor + * The compressor to free. + */ +extern void +wimlib_free_compressor(struct wimlib_compressor *compressor); + +/** + * @ingroup G_compression * - * @param compressed_data - * Pointer to the compressed data. + * Set the default decompression parameters for the specified compression type. + * This will affect both explicit and wimlib-internal calls to + * wimlib_create_decompressor(). * - * @param compressed_len - * Length of the compressed data, in bytes. + * @param ctype + * Compression type for which to set the default decompression parameters. + * @param params + * Compression-type specific parameters. This may be @ NULL, in which case + * the "default default" parameters are restored. * - * @param uncompressed_data - * Pointer to the buffer into which to write the uncompressed data. + * @return 0 on success; nonzero on error. + * + * @retval ::WIMLIB_ERR_INVALID_COMPRESSION_TYPE + * @p ctype was not a supported compression type. + */ +extern int +wimlib_set_default_decompressor_params(enum wimlib_compression_type ctype, + const struct wimlib_decompressor_params_header *params); + +/** + * @ingroup G_compression * - * @param uncompressed_len - * Length of the uncompressed data. + * Allocate a decompressor for the specified compression type using the + * specified parameters. * - * @return - * 0 on success; non-zero on failure. + * @param ctype + * Compression type for which to create the decompressor. + * @param max_block_size + * Maximum block size to support. The exact meaning and allowed values for + * this parameter depend on the compression type, but it at least specifies + * the maximum allowed value for @p uncompressed_size to + * wimlib_decompress(). + * @param extra_params + * An optional pointer to extra decompressor parameters for the specified + * compression type. If @c NULL, the default parameters are used. + * @param decompressor_ret + * A location into which to return the pointer to the allocated + * decompressor, which can be used for any number of calls to + * wimlib_decompress() before being freed with wimlib_free_decompressor(). + * + * @return 0 on success; nonzero on error. + * + * @retval ::WIMLIB_ERR_INVALID_COMPRESSION_TYPE + * @p ctype was not a supported compression type. + * @retval ::WIMLIB_ERR_INVALID_PARAM + * The decompression parameters were invalid. + * @retval ::WIMLIB_ERR_NOMEM + * Insufficient memory to allocate the decompressor. */ extern int -wimlib_xpress_decompress(const void *compressed_data, unsigned compressed_len, - void *uncompressed_data, unsigned uncompressed_len); +wimlib_create_decompressor(enum wimlib_compression_type ctype, + size_t max_block_size, + const struct wimlib_decompressor_params_header *extra_params, + struct wimlib_decompressor **decompressor_ret); + +/** + * @ingroup G_compression + * + * Decompress a block of data using a decompressor previously created with + * wimlib_create_decompressor(). + * + * @param compressed_data + * Buffer containing the data to decompress. + * @param compressed_size + * Size, in bytes, of the data to decompress. + * @param uncompressed_data + * Buffer into which to write the uncompressed data. + * @param uncompressed_size + * Size, in bytes, of the data when uncompressed. + * @param decompressor + * A decompressor previously allocated with wimlib_create_decompressor(). + * + * @return 0 on success; nonzero on error. + */ +extern int +wimlib_decompress(const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size, + struct wimlib_decompressor *decompressor); + +/** + * @ingroup G_compression + * + * Free a decompressor previously allocated with wimlib_create_decompressor(). + * + * @param decompressor + * The decompressor to free. + */ +extern void +wimlib_free_decompressor(struct wimlib_decompressor *decompressor); + #ifdef __cplusplus } diff --git a/include/wimlib/compress_chunks.h b/include/wimlib/chunk_compressor.h similarity index 87% rename from include/wimlib/compress_chunks.h rename to include/wimlib/chunk_compressor.h index cfd73336..176dc531 100644 --- a/include/wimlib/compress_chunks.h +++ b/include/wimlib/chunk_compressor.h @@ -1,16 +1,13 @@ -#ifndef _WIMLIB_COMPRESS_CHUNK_H -#define _WIMLIB_COMPRESS_CHUNK_H +/* + * chunk_compressor.h + * + * Interface for serial/parallel chunk compression. + */ -#include - -struct wimlib_lzx_context; +#ifndef _WIMLIB_CHUNK_COMPRESSOR_H +#define _WIMLIB_CHUNK_COMPRESSOR_H -unsigned -compress_chunk(const void * uncompressed_data, - unsigned uncompressed_len, - void *compressed_data, - int out_ctype, - struct wimlib_lzx_context *comp_ctx); +#include /* Interface for chunk compression. Users can submit chunks of data to be * compressed, then retrieve them later in order. This interface can be @@ -69,7 +66,6 @@ new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size, int new_serial_chunk_compressor(int out_ctype, u32 out_chunk_size, - struct wimlib_lzx_context *comp_ctx, struct chunk_compressor **compressor_ret); -#endif +#endif /* _WIMLIB_CHUNK_COMPRESSOR_H */ diff --git a/include/wimlib/compress.h b/include/wimlib/compress_common.h similarity index 96% rename from include/wimlib/compress.h rename to include/wimlib/compress_common.h index fcd3c411..14b32f48 100644 --- a/include/wimlib/compress.h +++ b/include/wimlib/compress_common.h @@ -1,11 +1,11 @@ /* - * compress.h + * compress_common.h * * Header for compression code shared by multiple compression formats. */ -#ifndef _WIMLIB_COMPRESS_H -#define _WIMLIB_COMPRESS_H +#ifndef _WIMLIB_COMPRESS_COMMON_H +#define _WIMLIB_COMPRESS_COMMON_H #include "wimlib/types.h" diff --git a/include/wimlib/compressor_ops.h b/include/wimlib/compressor_ops.h new file mode 100644 index 00000000..4c36bb36 --- /dev/null +++ b/include/wimlib/compressor_ops.h @@ -0,0 +1,34 @@ +/* + * compressor_ops.h + * + * Interface implemented by compressors for specific formats. + */ + +#ifndef _WIMLIB_COMPRESSOR_OPS_H +#define _WIMLIB_COMPRESSOR_OPS_H + +#include + +struct compressor_ops { + + int (*create_compressor)(size_t max_block_size, + const struct wimlib_compressor_params_header *extra_params, + void **private_ret); + + size_t (*compress)(const void *uncompressed_data, + size_t uncompressed_size, + void *compressed_data, + size_t compressed_size_avail, + void *private); + + void (*free_compressor)(void *private); +}; + +extern const struct compressor_ops lzx_compressor_ops; +extern const struct compressor_ops xpress_compressor_ops; +extern const struct compressor_ops lzms_compressor_ops; + +extern void +cleanup_compressor_params(void); + +#endif /* _WIMLIB_COMPRESSOR_OPS_H */ diff --git a/include/wimlib/decompress.h b/include/wimlib/decompress_common.h similarity index 98% rename from include/wimlib/decompress.h rename to include/wimlib/decompress_common.h index ec8804e6..4b5a7e3c 100644 --- a/include/wimlib/decompress.h +++ b/include/wimlib/decompress_common.h @@ -1,11 +1,11 @@ /* - * decompress.h + * decompress_common.h * * Header for decompression code shared by multiple compression formats. */ -#ifndef _WIMLIB_DECOMPRESS_H -#define _WIMLIB_DECOMPRESS_H +#ifndef _WIMLIB_DECOMPRESS_COMMON_H +#define _WIMLIB_DECOMPRESS_COMMON_H #include "wimlib/assert.h" #include "wimlib/compiler.h" diff --git a/include/wimlib/decompressor_ops.h b/include/wimlib/decompressor_ops.h new file mode 100644 index 00000000..9e642853 --- /dev/null +++ b/include/wimlib/decompressor_ops.h @@ -0,0 +1,34 @@ +/* + * decompressor_ops.h + * + * Interface implemented by decompressors for specific formats. + */ + +#ifndef _WIMLIB_DECOMPRESSOR_OPS_H +#define _WIMLIB_DECOMPRESSOR_OPS_H + +#include + +struct decompressor_ops { + + int (*create_decompressor)(size_t max_block_size, + const struct wimlib_decompressor_params_header *extra_params, + void **private_ret); + + int (*decompress)(const void *compressed_data, + size_t compressed_size, + void *uncompressed_data, + size_t uncompressed_size, + void *private); + + void (*free_decompressor)(void *private); +}; + +extern const struct decompressor_ops lzx_decompressor_ops; +extern const struct decompressor_ops xpress_decompressor_ops; +extern const struct decompressor_ops lzms_decompressor_ops; + +extern void +cleanup_decompressor_params(void); + +#endif /* _WIMLIB_DECOMPRESSOR_OPS_H */ diff --git a/include/wimlib/lookup_table.h b/include/wimlib/lookup_table.h index f99f38d4..c9637385 100644 --- a/include/wimlib/lookup_table.h +++ b/include/wimlib/lookup_table.h @@ -303,8 +303,7 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, struct filedes *out_fd, u16 part_number, struct wim_reshdr *out_reshdr, - int write_resource_flags, - struct wimlib_lzx_context **comp_ctx); + int write_resource_flags); extern void free_lookup_table(struct wim_lookup_table *table); diff --git a/include/wimlib/lzx.h b/include/wimlib/lzx.h index 1332645f..28408b32 100644 --- a/include/wimlib/lzx.h +++ b/include/wimlib/lzx.h @@ -134,7 +134,7 @@ lzx_get_position_slot_raw(unsigned formatted_offset) } } -extern bool lzx_window_size_valid(u32 window_size); +extern bool lzx_window_size_valid(size_t window_size); extern unsigned lzx_get_num_main_syms(u32 window_size); #define LZX_NUM_RECENT_OFFSETS 3 diff --git a/include/wimlib/wim.h b/include/wimlib/wim.h index 83b1dbcb..99a37362 100644 --- a/include/wimlib/wim.h +++ b/include/wimlib/wim.h @@ -42,8 +42,9 @@ struct WIMStruct { /* Temporary field */ void *private; - /* LZX compression context for default thread */ - struct wimlib_lzx_context *lzx_context; + struct wimlib_decompressor *decompressor; + enum wimlib_compression_type decompressor_ctype; + u32 decompressor_max_block_size; struct list_head subwims; diff --git a/include/wimlib/write.h b/include/wimlib/write.h index 9857fc03..3665eea4 100644 --- a/include/wimlib/write.h +++ b/include/wimlib/write.h @@ -47,7 +47,6 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size, u32 out_chunk_size, struct wim_reshdr *out_reshdr, u8 *hash, - int write_resource_flags, - struct wimlib_lzx_context **comp_ctx); + int write_resource_flags); #endif /* _WIMLIB_WRITE_H */ diff --git a/programs/imagex.c b/programs/imagex.c index b2164402..33274616 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -434,8 +434,10 @@ static int set_compress_slow(void) { int ret; - static const struct wimlib_lzx_params slow_params = { - .size_of_this = sizeof(struct wimlib_lzx_params), + static const struct wimlib_lzx_compressor_params slow_params = { + .hdr = { + .size = sizeof(struct wimlib_lzx_compressor_params), + }, .algorithm = WIMLIB_LZX_ALGORITHM_SLOW, .alg_params = { .slow = { @@ -450,7 +452,8 @@ set_compress_slow(void) }, }, }; - ret = wimlib_lzx_set_default_params(&slow_params); + ret = wimlib_set_default_compressor_params(WIMLIB_COMPRESSION_TYPE_LZX, + &slow_params.hdr); if (ret) imagex_error(T("Couldn't set slow compression parameters.!")); return ret; @@ -1866,13 +1869,14 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) /* Set default compression type. */ if (compression_type == WIMLIB_COMPRESSION_TYPE_INVALID) { - struct wimlib_lzx_params params; + struct wimlib_lzx_compressor_params params; memset(¶ms, 0, sizeof(params)); - params.size_of_this = sizeof(params); + params.hdr.size = sizeof(params); params.algorithm = WIMLIB_LZX_ALGORITHM_FAST; params.use_defaults = 1; - wimlib_lzx_set_default_params(¶ms); + wimlib_set_default_compressor_params(WIMLIB_COMPRESSION_TYPE_LZX, + ¶ms.hdr); compression_type = WIMLIB_COMPRESSION_TYPE_LZX; } diff --git a/src/compress.c b/src/compress.c index 79fe4ddf..32d5fb49 100644 --- a/src/compress.c +++ b/src/compress.c @@ -1,11 +1,12 @@ /* * compress.c * - * Functions used for compression. + * Generic functions for compression, wrapping around actual compression + * implementations. */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2013 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * @@ -27,445 +28,120 @@ # include "config.h" #endif -#include "wimlib/assert.h" -#include "wimlib/endianness.h" -#include "wimlib/compiler.h" -#include "wimlib/compress.h" +#include "wimlib.h" +#include "wimlib/compressor_ops.h" #include "wimlib/util.h" -#include -#include - -/* Writes @num_bits bits, given by the @num_bits least significant bits of - * @bits, to the output @ostream. */ -void -bitstream_put_bits(struct output_bitstream *ostream, u32 bits, - unsigned num_bits) +struct wimlib_compressor { + const struct compressor_ops *ops; + void *private; +}; + +static const struct compressor_ops *compressor_ops[] = { + [WIMLIB_COMPRESSION_TYPE_LZX] = &lzx_compressor_ops, + [WIMLIB_COMPRESSION_TYPE_XPRESS] = &xpress_compressor_ops, + [WIMLIB_COMPRESSION_TYPE_LZMS] = &lzms_compressor_ops, +}; + +static struct wimlib_compressor_params_header * +compressor_default_params[ARRAY_LEN(compressor_ops)] = { + [WIMLIB_COMPRESSION_TYPE_LZX] = NULL, + [WIMLIB_COMPRESSION_TYPE_XPRESS] = NULL, + [WIMLIB_COMPRESSION_TYPE_LZMS] = NULL, +}; + +static bool +compressor_ctype_valid(int ctype) { - bits &= (1U << num_bits) - 1; - while (num_bits > ostream->free_bits) { - /* Buffer variable does not have space for the new bits. It - * needs to be flushed as a 16-bit integer. Bits in the second - * byte logically precede those in the first byte - * (little-endian), but within each byte the bits are ordered - * from high to low. This is true for both XPRESS and LZX - * compression. */ - - /* There must be at least 2 bytes of space remaining. */ - if (unlikely(ostream->bytes_remaining < 2)) { - ostream->overrun = true; - return; - } - - /* Fill the buffer with as many bits that fit. */ - unsigned fill_bits = ostream->free_bits; - - ostream->bitbuf <<= fill_bits; - ostream->bitbuf |= bits >> (num_bits - fill_bits); - - *(le16*)ostream->bit_output = cpu_to_le16(ostream->bitbuf); - ostream->bit_output = ostream->next_bit_output; - ostream->next_bit_output = ostream->output; - ostream->output += 2; - ostream->bytes_remaining -= 2; - - ostream->free_bits = 16; - num_bits -= fill_bits; - bits &= (1U << num_bits) - 1; - } - - /* Buffer variable has space for the new bits. */ - ostream->bitbuf = (ostream->bitbuf << num_bits) | bits; - ostream->free_bits -= num_bits; + return (ctype >= 0 && + ctype < ARRAY_LEN(compressor_ops) && + compressor_ops[ctype] != NULL); } -void -bitstream_put_byte(struct output_bitstream *ostream, u8 n) +WIMLIBAPI int +wimlib_set_default_compressor_params(enum wimlib_compression_type ctype, + const struct wimlib_compressor_params_header *params) { - if (unlikely(ostream->bytes_remaining < 1)) { - ostream->overrun = true; - return; - } - *ostream->output++ = n; - ostream->bytes_remaining--; -} + struct wimlib_compressor_params_header *dup; -/* Flushes any remaining bits to the output bitstream. - * - * Returns -1 if the stream has overrun; otherwise returns the total number of - * bytes in the output. */ -input_idx_t -flush_output_bitstream(struct output_bitstream *ostream) -{ - if (unlikely(ostream->overrun)) - return ~(input_idx_t)0; + if (!compressor_ctype_valid(ctype)) + return WIMLIB_ERR_INVALID_COMPRESSION_TYPE; - *(le16*)ostream->bit_output = - cpu_to_le16((u16)((u32)ostream->bitbuf << ostream->free_bits)); - *(le16*)ostream->next_bit_output = - cpu_to_le16(0); + dup = NULL; + if (params) { + dup = memdup(params, params->size); + if (dup == NULL) + return WIMLIB_ERR_NOMEM; + } - return ostream->output - ostream->output_start; + FREE(compressor_default_params[ctype]); + compressor_default_params[ctype] = dup; + return 0; } -/* Initializes an output bit buffer to write its output to the memory location - * pointer to by @data. */ void -init_output_bitstream(struct output_bitstream *ostream, - void *data, unsigned num_bytes) -{ - wimlib_assert(num_bytes >= 4); - - ostream->bitbuf = 0; - ostream->free_bits = 16; - ostream->output_start = data; - ostream->bit_output = data; - ostream->next_bit_output = data + 2; - ostream->output = data + 4; - ostream->bytes_remaining = num_bytes - 4; - ostream->overrun = false; -} - -typedef struct { - input_idx_t freq; - u16 sym; - union { - u16 path_len; - u16 height; - }; -} HuffmanNode; - -typedef struct HuffmanIntermediateNode { - HuffmanNode node_base; - HuffmanNode *left_child; - HuffmanNode *right_child; -} HuffmanIntermediateNode; - - -/* Comparator function for HuffmanNodes. Sorts primarily by symbol - * frequency and secondarily by symbol value. */ -static int -cmp_nodes_by_freq(const void *_leaf1, const void *_leaf2) +cleanup_compressor_params(void) { - const HuffmanNode *leaf1 = _leaf1; - const HuffmanNode *leaf2 = _leaf2; - - if (leaf1->freq > leaf2->freq) - return 1; - else if (leaf1->freq < leaf2->freq) - return -1; - else - return (int)leaf1->sym - (int)leaf2->sym; + for (size_t i = 0; i < ARRAY_LEN(compressor_default_params); i++) { + FREE(compressor_default_params[i]); + compressor_default_params[i] = NULL; + } } -/* Comparator function for HuffmanNodes. Sorts primarily by code length and - * secondarily by symbol value. */ -static int -cmp_nodes_by_code_len(const void *_leaf1, const void *_leaf2) +WIMLIBAPI int +wimlib_create_compressor(enum wimlib_compression_type ctype, + size_t max_block_size, + const struct wimlib_compressor_params_header *extra_params, + struct wimlib_compressor **c_ret) { - const HuffmanNode *leaf1 = _leaf1; - const HuffmanNode *leaf2 = _leaf2; - - int code_len_diff = (int)leaf1->path_len - (int)leaf2->path_len; - - if (code_len_diff == 0) - return (int)leaf1->sym - (int)leaf2->sym; - else - return code_len_diff; + struct wimlib_compressor *c; + + if (c_ret == NULL) + return WIMLIB_ERR_INVALID_PARAM; + + if (!compressor_ctype_valid(ctype)) + return WIMLIB_ERR_INVALID_COMPRESSION_TYPE; + + c = MALLOC(sizeof(*c)); + if (c == NULL) + return WIMLIB_ERR_NOMEM; + c->ops = compressor_ops[ctype]; + c->private = NULL; + if (c->ops->create_compressor) { + const struct wimlib_compressor_params_header *params; + int ret; + + if (extra_params) + params = extra_params; + else + params = compressor_default_params[ctype]; + ret = c->ops->create_compressor(max_block_size, + params, &c->private); + if (ret) { + FREE(c); + return ret; + } + } + *c_ret = c; + return 0; } -#define INVALID_SYMBOL 0xffff - -/* Recursive function to calculate the depth of the leaves in a Huffman tree. - * */ -static void -huffman_tree_compute_path_lengths(HuffmanNode *base_node, u16 cur_len) +WIMLIBAPI size_t +wimlib_compress(const void *uncompressed_data, size_t uncompressed_size, + void *compressed_data, size_t compressed_size_avail, + struct wimlib_compressor *c) { - if (base_node->sym == INVALID_SYMBOL) { - /* Intermediate node. */ - HuffmanIntermediateNode *node = (HuffmanIntermediateNode*)base_node; - huffman_tree_compute_path_lengths(node->left_child, cur_len + 1); - huffman_tree_compute_path_lengths(node->right_child, cur_len + 1); - } else { - /* Leaf node. */ - base_node->path_len = cur_len; - } + return c->ops->compress(uncompressed_data, uncompressed_size, + compressed_data, compressed_size_avail, + c->private); } -/* make_canonical_huffman_code: - Creates a canonical Huffman code from an array - * of symbol frequencies. - * - * The algorithm used is similar to the well-known algorithm that builds a - * Huffman tree using a minheap. In that algorithm, the leaf nodes are - * initialized and inserted into the minheap with the frequency as the key. - * Repeatedly, the top two nodes (nodes with the lowest frequency) are taken out - * of the heap and made the children of a new node that has a frequency equal to - * the sum of the two frequencies of its children. This new node is inserted - * into the heap. When all the nodes have been removed from the heap, what - * remains is the Huffman tree. The Huffman code for a symbol is given by the - * path to it in the tree, where each left pointer is mapped to a 0 bit and each - * right pointer is mapped to a 1 bit. - * - * The algorithm used here uses an optimization that removes the need to - * actually use a heap. The leaf nodes are first sorted by frequency, as - * opposed to being made into a heap. Note that this sorting step takes O(n log - * n) time vs. O(n) time for heapifying the array, where n is the number of - * symbols. However, the heapless method is probably faster overall, due to the - * time saved later. In the heapless method, whenever an intermediate node is - * created, it is not inserted into the sorted array. Instead, the intermediate - * nodes are kept in a separate array, which is easily kept sorted because every - * time an intermediate node is initialized, it will have a frequency at least - * as high as that of the previous intermediate node that was initialized. So - * whenever we want the 2 nodes, leaf or intermediate, that have the lowest - * frequency, we check the low-frequency ends of both arrays, which is an O(1) - * operation. - * - * The function builds a canonical Huffman code, not just any Huffman code. A - * Huffman code is canonical if the codeword for each symbol numerically - * precedes the codeword for all other symbols of the same length that are - * numbered higher than the symbol, and additionally, all shorter codewords, - * 0-extended, numerically precede longer codewords. A canonical Huffman code - * is useful because it can be reconstructed by only knowing the path lengths in - * the tree. See the make_huffman_decode_table() function to see how to - * reconstruct a canonical Huffman code from only the lengths of the codes. - * - * @num_syms: The number of symbols in the alphabet. - * - * @max_codeword_len: The maximum allowed length of a codeword in the code. - * Note that if the code being created runs up against - * this restriction, the code ultimately created will be - * suboptimal, although there are some advantages for - * limiting the length of the codewords. - * - * @freq_tab: An array of length @num_syms that contains the frequencies - * of each symbol in the uncompressed data. - * - * @lens: An array of length @num_syms into which the lengths of the - * codewords for each symbol will be written. - * - * @codewords: An array of @num_syms short integers into which the - * codewords for each symbol will be written. The first - * lens[i] bits of codewords[i] will contain the codeword - * for symbol i. - */ -void -make_canonical_huffman_code(unsigned num_syms, - unsigned max_codeword_len, - const input_idx_t freq_tab[restrict], - u8 lens[restrict], - u16 codewords[restrict]) +WIMLIBAPI void +wimlib_free_compressor(struct wimlib_compressor *c) { - /* We require at least 2 possible symbols in the alphabet to produce a - * valid Huffman decoding table. It is allowed that fewer than 2 symbols - * are actually used, though. */ - wimlib_assert(num_syms >= 2 && num_syms < INVALID_SYMBOL); - - /* Initialize the lengths and codewords to 0 */ - memset(lens, 0, num_syms * sizeof(lens[0])); - memset(codewords, 0, num_syms * sizeof(codewords[0])); - - /* Calculate how many symbols have non-zero frequency. These are the - * symbols that actually appeared in the input. */ - unsigned num_used_symbols = 0; - for (unsigned i = 0; i < num_syms; i++) - if (freq_tab[i] != 0) - num_used_symbols++; - - - /* It is impossible to make a code for num_used_symbols symbols if there - * aren't enough code bits to uniquely represent all of them. */ - wimlib_assert((1 << max_codeword_len) > num_used_symbols); - - /* Initialize the array of leaf nodes with the symbols and their - * frequencies. */ - HuffmanNode leaves[num_used_symbols]; - unsigned leaf_idx = 0; - for (unsigned i = 0; i < num_syms; i++) { - if (freq_tab[i] != 0) { - leaves[leaf_idx].freq = freq_tab[i]; - leaves[leaf_idx].sym = i; - leaves[leaf_idx].height = 0; - leaf_idx++; - } - } - - /* Deal with the special cases where num_used_symbols < 2. */ - if (num_used_symbols < 2) { - if (num_used_symbols == 0) { - /* If num_used_symbols is 0, there are no symbols in the - * input, so it must be empty. This should be an error, - * but the LZX format expects this case to succeed. All - * the codeword lengths are simply marked as 0 (which - * was already done.) */ - } else { - /* If only one symbol is present, the LZX format - * requires that the Huffman code include two codewords. - * One is not used. Note that this doesn't make the - * encoded data take up more room anyway, since binary - * data itself has 2 symbols. */ - - unsigned sym = leaves[0].sym; - - codewords[0] = 0; - lens[0] = 1; - if (sym == 0) { - /* dummy symbol is 1, real symbol is 0 */ - codewords[1] = 1; - lens[1] = 1; - } else { - /* dummy symbol is 0, real symbol is sym */ - codewords[sym] = 1; - lens[sym] = 1; - } - } - return; - } - - /* Otherwise, there are at least 2 symbols in the input, so we need to - * find a real Huffman code. */ - - - /* Declare the array of intermediate nodes. An intermediate node is not - * associated with a symbol. Instead, it represents some binary code - * prefix that is shared between at least 2 codewords. There can be at - * most num_used_symbols - 1 intermediate nodes when creating a Huffman - * code. This is because if there were at least num_used_symbols nodes, - * the code would be suboptimal because there would be at least one - * unnecessary intermediate node. - * - * The worst case (greatest number of intermediate nodes) would be if - * all the intermediate nodes were chained together. This results in - * num_used_symbols - 1 intermediate nodes. If num_used_symbols is at - * least 17, this configuration would not be allowed because the LZX - * format constrains codes to 16 bits or less each. However, it is - * still possible for there to be more than 16 intermediate nodes, as - * long as no leaf has a depth of more than 16. */ - HuffmanIntermediateNode inodes[num_used_symbols - 1]; - - - /* Pointer to the leaf node of lowest frequency that hasn't already been - * added as the child of some intermediate note. */ - HuffmanNode *cur_leaf; - - /* Pointer past the end of the array of leaves. */ - HuffmanNode *end_leaf = &leaves[num_used_symbols]; - - /* Pointer to the intermediate node of lowest frequency. */ - HuffmanIntermediateNode *cur_inode; - - /* Pointer to the next unallocated intermediate node. */ - HuffmanIntermediateNode *next_inode; - - /* Only jump back to here if the maximum length of the codewords allowed - * by the LZX format (16 bits) is exceeded. */ -try_building_tree_again: - - /* Sort the leaves from those that correspond to the least frequent - * symbol, to those that correspond to the most frequent symbol. If two - * leaves have the same frequency, they are sorted by symbol. */ - qsort(leaves, num_used_symbols, sizeof(leaves[0]), cmp_nodes_by_freq); - - cur_leaf = &leaves[0]; - cur_inode = &inodes[0]; - next_inode = &inodes[0]; - - /* The following loop takes the two lowest frequency nodes of those - * remaining and makes them the children of the next available - * intermediate node. It continues until all the leaf nodes and - * intermediate nodes have been used up, or the maximum allowed length - * for the codewords is exceeded. For the latter case, we must adjust - * the frequencies to be more equal and then execute this loop again. */ - while (1) { - - /* Lowest frequency node. */ - HuffmanNode *f1; - - /* Second lowest frequency node. */ - HuffmanNode *f2; - - /* Get the lowest and second lowest frequency nodes from the - * remaining leaves or from the intermediate nodes. */ - - if (cur_leaf != end_leaf && (cur_inode == next_inode || - cur_leaf->freq <= cur_inode->node_base.freq)) { - f1 = cur_leaf++; - } else if (cur_inode != next_inode) { - f1 = (HuffmanNode*)cur_inode++; - } - - if (cur_leaf != end_leaf && (cur_inode == next_inode || - cur_leaf->freq <= cur_inode->node_base.freq)) { - f2 = cur_leaf++; - } else if (cur_inode != next_inode) { - f2 = (HuffmanNode*)cur_inode++; - } else { - /* All nodes used up! */ - break; - } - - /* next_inode becomes the parent of f1 and f2. */ - - next_inode->node_base.freq = f1->freq + f2->freq; - next_inode->node_base.sym = INVALID_SYMBOL; - next_inode->left_child = f1; - next_inode->right_child = f2; - - /* We need to keep track of the height so that we can detect if - * the length of a codeword has execeed max_codeword_len. The - * parent node has a height one higher than the maximum height - * of its children. */ - next_inode->node_base.height = max(f1->height, f2->height) + 1; - - /* Check to see if the code length of the leaf farthest away - * from next_inode has exceeded the maximum code length. */ - if (next_inode->node_base.height > max_codeword_len) { - /* The code lengths can be made more uniform by making - * the frequencies more uniform. Divide all the - * frequencies by 2, leaving 1 as the minimum frequency. - * If this keeps happening, the symbol frequencies will - * approach equality, which makes their Huffman - * codewords approach the length - * log_2(num_used_symbols). - * */ - for (unsigned i = 0; i < num_used_symbols; i++) - leaves[i].freq = (leaves[i].freq + 1) >> 1; - - goto try_building_tree_again; - } - next_inode++; - } - - /* The Huffman tree is now complete, and its height is no more than - * max_codeword_len. */ - - HuffmanIntermediateNode *root = next_inode - 1; - wimlib_assert(root->node_base.height <= max_codeword_len); - - /* Compute the path lengths for the leaf nodes. */ - huffman_tree_compute_path_lengths(&root->node_base, 0); - - /* Sort the leaf nodes primarily by code length and secondarily by - * symbol. */ - qsort(leaves, num_used_symbols, sizeof(leaves[0]), cmp_nodes_by_code_len); - - u16 cur_codeword = 0; - unsigned cur_codeword_len = 0; - for (unsigned i = 0; i < num_used_symbols; i++) { - - /* Each time a codeword becomes one longer, the current codeword - * is left shifted by one place. This is part of the procedure - * for enumerating the canonical Huffman code. Additionally, - * whenever a codeword is used, 1 is added to the current - * codeword. */ - - unsigned len_diff = leaves[i].path_len - cur_codeword_len; - cur_codeword <<= len_diff; - cur_codeword_len += len_diff; - - u16 sym = leaves[i].sym; - codewords[sym] = cur_codeword; - lens[sym] = cur_codeword_len; - - cur_codeword++; + if (c) { + if (c->ops->free_compressor) + c->ops->free_compressor(c->private); + FREE(c); } } diff --git a/src/compress_chunk.c b/src/compress_chunk.c deleted file mode 100644 index bd92cd6a..00000000 --- a/src/compress_chunk.c +++ /dev/null @@ -1,35 +0,0 @@ -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include "wimlib.h" -#include "wimlib/compress_chunks.h" -#include "wimlib/error.h" -#include "wimlib/assert.h" - -unsigned -compress_chunk(const void * uncompressed_data, - unsigned uncompressed_len, - void *compressed_data, - int out_ctype, - struct wimlib_lzx_context *comp_ctx) -{ - switch (out_ctype) { - case WIMLIB_COMPRESSION_TYPE_XPRESS: - return wimlib_xpress_compress(uncompressed_data, - uncompressed_len, - compressed_data); - case WIMLIB_COMPRESSION_TYPE_LZX: - return wimlib_lzx_compress2(uncompressed_data, - uncompressed_len, - compressed_data, - comp_ctx); - case WIMLIB_COMPRESSION_TYPE_LZMS: - return 0; - - default: - wimlib_assert(0); - WARNING("Unknown compression type!"); - return 0; - } -} diff --git a/src/compress_common.c b/src/compress_common.c new file mode 100644 index 00000000..9659d07b --- /dev/null +++ b/src/compress_common.c @@ -0,0 +1,471 @@ +/* + * compress_common.c + * + * Code for compression shared among multiple compression formats. + */ + +/* + * Copyright (C) 2012, 2013 Eric Biggers + * + * This file is part of wimlib, a library for working with WIM files. + * + * wimlib is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. + * + * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with wimlib; if not, see http://www.gnu.org/licenses/. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wimlib/assert.h" +#include "wimlib/endianness.h" +#include "wimlib/compiler.h" +#include "wimlib/compress_common.h" +#include "wimlib/util.h" + +#include +#include + +/* Writes @num_bits bits, given by the @num_bits least significant bits of + * @bits, to the output @ostream. */ +void +bitstream_put_bits(struct output_bitstream *ostream, u32 bits, + unsigned num_bits) +{ + bits &= (1U << num_bits) - 1; + while (num_bits > ostream->free_bits) { + /* Buffer variable does not have space for the new bits. It + * needs to be flushed as a 16-bit integer. Bits in the second + * byte logically precede those in the first byte + * (little-endian), but within each byte the bits are ordered + * from high to low. This is true for both XPRESS and LZX + * compression. */ + + /* There must be at least 2 bytes of space remaining. */ + if (unlikely(ostream->bytes_remaining < 2)) { + ostream->overrun = true; + return; + } + + /* Fill the buffer with as many bits that fit. */ + unsigned fill_bits = ostream->free_bits; + + ostream->bitbuf <<= fill_bits; + ostream->bitbuf |= bits >> (num_bits - fill_bits); + + *(le16*)ostream->bit_output = cpu_to_le16(ostream->bitbuf); + ostream->bit_output = ostream->next_bit_output; + ostream->next_bit_output = ostream->output; + ostream->output += 2; + ostream->bytes_remaining -= 2; + + ostream->free_bits = 16; + num_bits -= fill_bits; + bits &= (1U << num_bits) - 1; + } + + /* Buffer variable has space for the new bits. */ + ostream->bitbuf = (ostream->bitbuf << num_bits) | bits; + ostream->free_bits -= num_bits; +} + +void +bitstream_put_byte(struct output_bitstream *ostream, u8 n) +{ + if (unlikely(ostream->bytes_remaining < 1)) { + ostream->overrun = true; + return; + } + *ostream->output++ = n; + ostream->bytes_remaining--; +} + +/* Flushes any remaining bits to the output bitstream. + * + * Returns -1 if the stream has overrun; otherwise returns the total number of + * bytes in the output. */ +input_idx_t +flush_output_bitstream(struct output_bitstream *ostream) +{ + if (unlikely(ostream->overrun)) + return ~(input_idx_t)0; + + *(le16*)ostream->bit_output = + cpu_to_le16((u16)((u32)ostream->bitbuf << ostream->free_bits)); + *(le16*)ostream->next_bit_output = + cpu_to_le16(0); + + return ostream->output - ostream->output_start; +} + +/* Initializes an output bit buffer to write its output to the memory location + * pointer to by @data. */ +void +init_output_bitstream(struct output_bitstream *ostream, + void *data, unsigned num_bytes) +{ + wimlib_assert(num_bytes >= 4); + + ostream->bitbuf = 0; + ostream->free_bits = 16; + ostream->output_start = data; + ostream->bit_output = data; + ostream->next_bit_output = data + 2; + ostream->output = data + 4; + ostream->bytes_remaining = num_bytes - 4; + ostream->overrun = false; +} + +typedef struct { + input_idx_t freq; + u16 sym; + union { + u16 path_len; + u16 height; + }; +} HuffmanNode; + +typedef struct HuffmanIntermediateNode { + HuffmanNode node_base; + HuffmanNode *left_child; + HuffmanNode *right_child; +} HuffmanIntermediateNode; + + +/* Comparator function for HuffmanNodes. Sorts primarily by symbol + * frequency and secondarily by symbol value. */ +static int +cmp_nodes_by_freq(const void *_leaf1, const void *_leaf2) +{ + const HuffmanNode *leaf1 = _leaf1; + const HuffmanNode *leaf2 = _leaf2; + + if (leaf1->freq > leaf2->freq) + return 1; + else if (leaf1->freq < leaf2->freq) + return -1; + else + return (int)leaf1->sym - (int)leaf2->sym; +} + +/* Comparator function for HuffmanNodes. Sorts primarily by code length and + * secondarily by symbol value. */ +static int +cmp_nodes_by_code_len(const void *_leaf1, const void *_leaf2) +{ + const HuffmanNode *leaf1 = _leaf1; + const HuffmanNode *leaf2 = _leaf2; + + int code_len_diff = (int)leaf1->path_len - (int)leaf2->path_len; + + if (code_len_diff == 0) + return (int)leaf1->sym - (int)leaf2->sym; + else + return code_len_diff; +} + +#define INVALID_SYMBOL 0xffff + +/* Recursive function to calculate the depth of the leaves in a Huffman tree. + * */ +static void +huffman_tree_compute_path_lengths(HuffmanNode *base_node, u16 cur_len) +{ + if (base_node->sym == INVALID_SYMBOL) { + /* Intermediate node. */ + HuffmanIntermediateNode *node = (HuffmanIntermediateNode*)base_node; + huffman_tree_compute_path_lengths(node->left_child, cur_len + 1); + huffman_tree_compute_path_lengths(node->right_child, cur_len + 1); + } else { + /* Leaf node. */ + base_node->path_len = cur_len; + } +} + +/* make_canonical_huffman_code: - Creates a canonical Huffman code from an array + * of symbol frequencies. + * + * The algorithm used is similar to the well-known algorithm that builds a + * Huffman tree using a minheap. In that algorithm, the leaf nodes are + * initialized and inserted into the minheap with the frequency as the key. + * Repeatedly, the top two nodes (nodes with the lowest frequency) are taken out + * of the heap and made the children of a new node that has a frequency equal to + * the sum of the two frequencies of its children. This new node is inserted + * into the heap. When all the nodes have been removed from the heap, what + * remains is the Huffman tree. The Huffman code for a symbol is given by the + * path to it in the tree, where each left pointer is mapped to a 0 bit and each + * right pointer is mapped to a 1 bit. + * + * The algorithm used here uses an optimization that removes the need to + * actually use a heap. The leaf nodes are first sorted by frequency, as + * opposed to being made into a heap. Note that this sorting step takes O(n log + * n) time vs. O(n) time for heapifying the array, where n is the number of + * symbols. However, the heapless method is probably faster overall, due to the + * time saved later. In the heapless method, whenever an intermediate node is + * created, it is not inserted into the sorted array. Instead, the intermediate + * nodes are kept in a separate array, which is easily kept sorted because every + * time an intermediate node is initialized, it will have a frequency at least + * as high as that of the previous intermediate node that was initialized. So + * whenever we want the 2 nodes, leaf or intermediate, that have the lowest + * frequency, we check the low-frequency ends of both arrays, which is an O(1) + * operation. + * + * The function builds a canonical Huffman code, not just any Huffman code. A + * Huffman code is canonical if the codeword for each symbol numerically + * precedes the codeword for all other symbols of the same length that are + * numbered higher than the symbol, and additionally, all shorter codewords, + * 0-extended, numerically precede longer codewords. A canonical Huffman code + * is useful because it can be reconstructed by only knowing the path lengths in + * the tree. See the make_huffman_decode_table() function to see how to + * reconstruct a canonical Huffman code from only the lengths of the codes. + * + * @num_syms: The number of symbols in the alphabet. + * + * @max_codeword_len: The maximum allowed length of a codeword in the code. + * Note that if the code being created runs up against + * this restriction, the code ultimately created will be + * suboptimal, although there are some advantages for + * limiting the length of the codewords. + * + * @freq_tab: An array of length @num_syms that contains the frequencies + * of each symbol in the uncompressed data. + * + * @lens: An array of length @num_syms into which the lengths of the + * codewords for each symbol will be written. + * + * @codewords: An array of @num_syms short integers into which the + * codewords for each symbol will be written. The first + * lens[i] bits of codewords[i] will contain the codeword + * for symbol i. + */ +void +make_canonical_huffman_code(unsigned num_syms, + unsigned max_codeword_len, + const input_idx_t freq_tab[restrict], + u8 lens[restrict], + u16 codewords[restrict]) +{ + /* We require at least 2 possible symbols in the alphabet to produce a + * valid Huffman decoding table. It is allowed that fewer than 2 symbols + * are actually used, though. */ + wimlib_assert(num_syms >= 2 && num_syms < INVALID_SYMBOL); + + /* Initialize the lengths and codewords to 0 */ + memset(lens, 0, num_syms * sizeof(lens[0])); + memset(codewords, 0, num_syms * sizeof(codewords[0])); + + /* Calculate how many symbols have non-zero frequency. These are the + * symbols that actually appeared in the input. */ + unsigned num_used_symbols = 0; + for (unsigned i = 0; i < num_syms; i++) + if (freq_tab[i] != 0) + num_used_symbols++; + + + /* It is impossible to make a code for num_used_symbols symbols if there + * aren't enough code bits to uniquely represent all of them. */ + wimlib_assert((1 << max_codeword_len) > num_used_symbols); + + /* Initialize the array of leaf nodes with the symbols and their + * frequencies. */ + HuffmanNode leaves[num_used_symbols]; + unsigned leaf_idx = 0; + for (unsigned i = 0; i < num_syms; i++) { + if (freq_tab[i] != 0) { + leaves[leaf_idx].freq = freq_tab[i]; + leaves[leaf_idx].sym = i; + leaves[leaf_idx].height = 0; + leaf_idx++; + } + } + + /* Deal with the special cases where num_used_symbols < 2. */ + if (num_used_symbols < 2) { + if (num_used_symbols == 0) { + /* If num_used_symbols is 0, there are no symbols in the + * input, so it must be empty. This should be an error, + * but the LZX format expects this case to succeed. All + * the codeword lengths are simply marked as 0 (which + * was already done.) */ + } else { + /* If only one symbol is present, the LZX format + * requires that the Huffman code include two codewords. + * One is not used. Note that this doesn't make the + * encoded data take up more room anyway, since binary + * data itself has 2 symbols. */ + + unsigned sym = leaves[0].sym; + + codewords[0] = 0; + lens[0] = 1; + if (sym == 0) { + /* dummy symbol is 1, real symbol is 0 */ + codewords[1] = 1; + lens[1] = 1; + } else { + /* dummy symbol is 0, real symbol is sym */ + codewords[sym] = 1; + lens[sym] = 1; + } + } + return; + } + + /* Otherwise, there are at least 2 symbols in the input, so we need to + * find a real Huffman code. */ + + + /* Declare the array of intermediate nodes. An intermediate node is not + * associated with a symbol. Instead, it represents some binary code + * prefix that is shared between at least 2 codewords. There can be at + * most num_used_symbols - 1 intermediate nodes when creating a Huffman + * code. This is because if there were at least num_used_symbols nodes, + * the code would be suboptimal because there would be at least one + * unnecessary intermediate node. + * + * The worst case (greatest number of intermediate nodes) would be if + * all the intermediate nodes were chained together. This results in + * num_used_symbols - 1 intermediate nodes. If num_used_symbols is at + * least 17, this configuration would not be allowed because the LZX + * format constrains codes to 16 bits or less each. However, it is + * still possible for there to be more than 16 intermediate nodes, as + * long as no leaf has a depth of more than 16. */ + HuffmanIntermediateNode inodes[num_used_symbols - 1]; + + + /* Pointer to the leaf node of lowest frequency that hasn't already been + * added as the child of some intermediate note. */ + HuffmanNode *cur_leaf; + + /* Pointer past the end of the array of leaves. */ + HuffmanNode *end_leaf = &leaves[num_used_symbols]; + + /* Pointer to the intermediate node of lowest frequency. */ + HuffmanIntermediateNode *cur_inode; + + /* Pointer to the next unallocated intermediate node. */ + HuffmanIntermediateNode *next_inode; + + /* Only jump back to here if the maximum length of the codewords allowed + * by the LZX format (16 bits) is exceeded. */ +try_building_tree_again: + + /* Sort the leaves from those that correspond to the least frequent + * symbol, to those that correspond to the most frequent symbol. If two + * leaves have the same frequency, they are sorted by symbol. */ + qsort(leaves, num_used_symbols, sizeof(leaves[0]), cmp_nodes_by_freq); + + cur_leaf = &leaves[0]; + cur_inode = &inodes[0]; + next_inode = &inodes[0]; + + /* The following loop takes the two lowest frequency nodes of those + * remaining and makes them the children of the next available + * intermediate node. It continues until all the leaf nodes and + * intermediate nodes have been used up, or the maximum allowed length + * for the codewords is exceeded. For the latter case, we must adjust + * the frequencies to be more equal and then execute this loop again. */ + while (1) { + + /* Lowest frequency node. */ + HuffmanNode *f1; + + /* Second lowest frequency node. */ + HuffmanNode *f2; + + /* Get the lowest and second lowest frequency nodes from the + * remaining leaves or from the intermediate nodes. */ + + if (cur_leaf != end_leaf && (cur_inode == next_inode || + cur_leaf->freq <= cur_inode->node_base.freq)) { + f1 = cur_leaf++; + } else if (cur_inode != next_inode) { + f1 = (HuffmanNode*)cur_inode++; + } + + if (cur_leaf != end_leaf && (cur_inode == next_inode || + cur_leaf->freq <= cur_inode->node_base.freq)) { + f2 = cur_leaf++; + } else if (cur_inode != next_inode) { + f2 = (HuffmanNode*)cur_inode++; + } else { + /* All nodes used up! */ + break; + } + + /* next_inode becomes the parent of f1 and f2. */ + + next_inode->node_base.freq = f1->freq + f2->freq; + next_inode->node_base.sym = INVALID_SYMBOL; + next_inode->left_child = f1; + next_inode->right_child = f2; + + /* We need to keep track of the height so that we can detect if + * the length of a codeword has execeed max_codeword_len. The + * parent node has a height one higher than the maximum height + * of its children. */ + next_inode->node_base.height = max(f1->height, f2->height) + 1; + + /* Check to see if the code length of the leaf farthest away + * from next_inode has exceeded the maximum code length. */ + if (next_inode->node_base.height > max_codeword_len) { + /* The code lengths can be made more uniform by making + * the frequencies more uniform. Divide all the + * frequencies by 2, leaving 1 as the minimum frequency. + * If this keeps happening, the symbol frequencies will + * approach equality, which makes their Huffman + * codewords approach the length + * log_2(num_used_symbols). + * */ + for (unsigned i = 0; i < num_used_symbols; i++) + leaves[i].freq = (leaves[i].freq + 1) >> 1; + + goto try_building_tree_again; + } + next_inode++; + } + + /* The Huffman tree is now complete, and its height is no more than + * max_codeword_len. */ + + HuffmanIntermediateNode *root = next_inode - 1; + wimlib_assert(root->node_base.height <= max_codeword_len); + + /* Compute the path lengths for the leaf nodes. */ + huffman_tree_compute_path_lengths(&root->node_base, 0); + + /* Sort the leaf nodes primarily by code length and secondarily by + * symbol. */ + qsort(leaves, num_used_symbols, sizeof(leaves[0]), cmp_nodes_by_code_len); + + u16 cur_codeword = 0; + unsigned cur_codeword_len = 0; + for (unsigned i = 0; i < num_used_symbols; i++) { + + /* Each time a codeword becomes one longer, the current codeword + * is left shifted by one place. This is part of the procedure + * for enumerating the canonical Huffman code. Additionally, + * whenever a codeword is used, 1 is added to the current + * codeword. */ + + unsigned len_diff = leaves[i].path_len - cur_codeword_len; + cur_codeword <<= len_diff; + cur_codeword_len += len_diff; + + u16 sym = leaves[i].sym; + codewords[sym] = cur_codeword; + lens[sym] = cur_codeword_len; + + cur_codeword++; + } +} diff --git a/src/compress_parallel.c b/src/compress_parallel.c index 25403423..d0ce5266 100644 --- a/src/compress_parallel.c +++ b/src/compress_parallel.c @@ -27,7 +27,7 @@ #endif #include "wimlib/assert.h" -#include "wimlib/compress_chunks.h" +#include "wimlib/chunk_compressor.h" #include "wimlib/error.h" #include "wimlib/list.h" #include "wimlib/util.h" @@ -54,9 +54,7 @@ struct compressor_thread_data { pthread_t thread; struct message_queue *chunks_to_compress_queue; struct message_queue *compressed_chunks_queue; - int out_ctype; - u32 out_chunk_size; - struct wimlib_lzx_context *comp_ctx; + struct wimlib_compressor *compressor; }; #define MAX_CHUNKS_PER_MSG 2 @@ -251,17 +249,17 @@ allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size) } static void -compress_chunks(struct message *msg, int out_ctype, - struct wimlib_lzx_context *comp_ctx) +compress_chunks(struct message *msg, struct wimlib_compressor *compressor) { for (size_t i = 0; i < msg->num_filled_chunks; i++) { + wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0); msg->compressed_chunk_sizes[i] = - compress_chunk(msg->uncompressed_chunks[i], - msg->uncompressed_chunk_sizes[i], - msg->compressed_chunks[i], - out_ctype, - comp_ctx); + wimlib_compress(msg->uncompressed_chunks[i], + msg->uncompressed_chunk_sizes[i], + msg->compressed_chunks[i], + msg->uncompressed_chunk_sizes[i] - 1, + compressor); } } @@ -272,7 +270,7 @@ compressor_thread_proc(void *arg) struct message *msg; while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) { - compress_chunks(msg, params->out_ctype, params->comp_ctx); + compress_chunks(msg, params->compressor); message_queue_put(params->compressed_chunks_queue, msg); } return NULL; @@ -298,10 +296,9 @@ parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx) message_queue_destroy(&ctx->chunks_to_compress_queue); message_queue_destroy(&ctx->compressed_chunks_queue); - if (ctx->base.out_ctype == WIMLIB_COMPRESSION_TYPE_LZX && - ctx->thread_data != NULL) + if (ctx->thread_data != NULL) for (i = 0; i < ctx->num_threads; i++) - wimlib_lzx_free_context(ctx->thread_data[i].comp_ctx); + wimlib_free_compressor(ctx->thread_data[i].compressor); FREE(ctx->thread_data); @@ -493,14 +490,10 @@ new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size, dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue; dat->compressed_chunks_queue = &ctx->compressed_chunks_queue; - dat->out_ctype = out_ctype; - dat->out_chunk_size = out_chunk_size; - if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZX) { - ret = wimlib_lzx_alloc_context(out_chunk_size, NULL, - &dat->comp_ctx); - if (ret) - goto err; - } + ret = wimlib_create_compressor(out_ctype, out_chunk_size, + NULL, &dat->compressor); + if (ret) + goto err; } for (ctx->num_started_threads = 0; diff --git a/src/compress_serial.c b/src/compress_serial.c index b3c6293e..0ae35483 100644 --- a/src/compress_serial.c +++ b/src/compress_serial.c @@ -28,14 +28,14 @@ #include "wimlib.h" #include "wimlib/assert.h" -#include "wimlib/compress_chunks.h" +#include "wimlib/chunk_compressor.h" #include "wimlib/util.h" #include struct serial_chunk_compressor { struct chunk_compressor base; - struct wimlib_lzx_context *comp_ctx; + struct wimlib_compressor *compressor; u8 *udata; u8 *cdata; unsigned ulen; @@ -50,6 +50,7 @@ serial_chunk_compressor_destroy(struct chunk_compressor *_ctx) if (ctx == NULL) return; + wimlib_free_compressor(ctx->compressor); FREE(ctx->udata); FREE(ctx->cdata); FREE(ctx); @@ -82,9 +83,9 @@ serial_chunk_compressor_get_chunk(struct chunk_compressor *_ctx, if (ctx->ulen == 0) return false; - ctx->clen = compress_chunk(ctx->udata, ctx->ulen, - ctx->cdata, ctx->base.out_ctype, - ctx->comp_ctx); + ctx->clen = wimlib_compress(ctx->udata, ctx->ulen, + ctx->cdata, ctx->ulen - 1, + ctx->compressor); if (ctx->clen) { *cdata_ret = ctx->cdata; @@ -101,14 +102,14 @@ serial_chunk_compressor_get_chunk(struct chunk_compressor *_ctx, int new_serial_chunk_compressor(int out_ctype, u32 out_chunk_size, - struct wimlib_lzx_context *comp_ctx, struct chunk_compressor **compressor_ret) { struct serial_chunk_compressor *ctx; + int ret; ctx = CALLOC(1, sizeof(*ctx)); if (ctx == NULL) - goto err; + return WIMLIB_ERR_NOMEM; ctx->base.out_ctype = out_ctype; ctx->base.out_chunk_size = out_chunk_size; @@ -117,17 +118,24 @@ new_serial_chunk_compressor(int out_ctype, u32 out_chunk_size, ctx->base.submit_chunk = serial_chunk_compressor_submit_chunk; ctx->base.get_chunk = serial_chunk_compressor_get_chunk; - ctx->comp_ctx = comp_ctx; + + ret = wimlib_create_compressor(out_ctype, out_chunk_size, + NULL, &ctx->compressor); + if (ret) + goto err; + ctx->udata = MALLOC(out_chunk_size); ctx->cdata = MALLOC(out_chunk_size - 1); ctx->ulen = 0; - if (ctx->udata == NULL || ctx->cdata == NULL) + if (ctx->udata == NULL || ctx->cdata == NULL) { + ret = WIMLIB_ERR_NOMEM; goto err; + } *compressor_ret = &ctx->base; return 0; err: serial_chunk_compressor_destroy(&ctx->base); - return WIMLIB_ERR_NOMEM; + return ret; } diff --git a/src/decompress.c b/src/decompress.c index c94696dd..4ea066f0 100644 --- a/src/decompress.c +++ b/src/decompress.c @@ -1,11 +1,12 @@ /* * decompress.c * - * Functions used for decompression. + * Generic functions for decompression, wrapping around actual decompression + * implementations. */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2013 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * @@ -27,406 +28,121 @@ # include "config.h" #endif -#include "wimlib/decompress.h" -#include "wimlib/error.h" +#include "wimlib.h" +#include "wimlib/decompressor_ops.h" #include "wimlib/util.h" -#include - -#ifdef __GNUC__ -# ifdef __SSE2__ -# define USE_SSE2_FILL -# include -# else -# define USE_LONG_FILL -# endif -#endif - -/* - * make_huffman_decode_table: - Builds a fast huffman decoding table from an - * array that gives the length of the codeword for each symbol in the alphabet. - * Originally based on code written by David Tritscher (taken the original LZX - * decompression code); also heavily modified to add some optimizations used in - * the zlib code, as well as more comments; also added some optimizations to - * make filling in the decode table entries faster (may not help significantly - * though). - * - * @decode_table: The array in which to create the fast huffman decoding - * table. It must have a length of at least - * (2**table_bits) + 2 * num_syms to guarantee - * that there is enough space. Also must be 16-byte - * aligned (at least when USE_SSE2_FILL gets defined). - * - * @num_syms: Number of symbols in the alphabet, including symbols - * that do not appear in this particular input chunk. - * - * @table_bits: Any symbols with a code length of table_bits or less can - * be decoded in one lookup of the table. 2**table_bits - * must be greater than or equal to @num_syms if there are - * any Huffman codes longer than @table_bits. - * - * @lens: An array of length @num_syms, indexable by symbol, that - * gives the length of the Huffman codeword for that - * symbol. Because the Huffman tree is in canonical form, - * it can be reconstructed by only knowing the length of - * the codeword for each symbol. It is assumed, but not - * checked, that every length is less than - * @max_codeword_len. - * - * @max_codeword_len: The longest codeword length allowed in the compression - * format. - * - * Returns 0 on success; returns -1 if the length values do not correspond to a - * valid Huffman tree. - * - * The format of the Huffamn decoding table is as follows. The first (1 << - * table_bits) entries of the table are indexed by chunks of the input of size - * @table_bits. If the next Huffman codeword in the input happens to have a - * length of exactly @table_bits, the symbol is simply read directly from the - * decoding table. Alternatively, if the next Huffman codeword has length _less - * than_ @table_bits, the symbol is also read directly from the decode table; - * this is possible because every entry in the table that is indexed by an - * integer that has the shorter codeword as a binary prefix is filled in with - * the appropriate symbol. If a codeword has length n <= table_bits, it will - * have 2**(table_bits - n) possible suffixes, and thus that many entries in the - * decoding table. - * - * It's a bit more complicated if the next Huffman codeword has length of more - * than @table_bits. The table entry indexed by the first @table_bits of that - * codeword cannot give the appropriate symbol directly, because that entry is - * guaranteed to be referenced by the Huffman codewords of multiple symbols. - * And while the LZX compression format does not allow codes longer than 16 - * bits, a table of size (2 ** 16) = 65536 entries would be too slow to create. - * - * There are several different ways to make it possible to look up the symbols - * for codewords longer than @table_bits. One way is to make the entries for - * the prefixes of length @table_bits of those entries be pointers to additional - * decoding tables that are indexed by some number of additional bits of the - * codeword. The technique used here is a bit simpler, however: just store the - * needed subtrees of the Huffman tree in the decoding table after the lookup - * entries, beginning at index (2**table_bits). Real pointers are replaced by - * indices into the decoding table, and symbol entries are distinguished from - * pointers by the fact that values less than @num_syms must be symbol values. - */ -int -make_huffman_decode_table(u16 *decode_table, unsigned num_syms, - unsigned table_bits, const u8 *lens, - unsigned max_codeword_len) +struct wimlib_decompressor { + const struct decompressor_ops *ops; + void *private; +}; + +static const struct decompressor_ops *decompressor_ops[] = { + [WIMLIB_COMPRESSION_TYPE_LZX] = &lzx_decompressor_ops, + [WIMLIB_COMPRESSION_TYPE_XPRESS] = &xpress_decompressor_ops, + [WIMLIB_COMPRESSION_TYPE_LZMS] = &lzms_decompressor_ops, +}; + +static struct wimlib_decompressor_params_header * +decompressor_default_params[ARRAY_LEN(decompressor_ops)] = { + [WIMLIB_COMPRESSION_TYPE_LZX] = NULL, + [WIMLIB_COMPRESSION_TYPE_XPRESS] = NULL, + [WIMLIB_COMPRESSION_TYPE_LZMS] = NULL, +}; + +static bool +decompressor_ctype_valid(int ctype) { - unsigned len_counts[max_codeword_len + 1]; - u16 sorted_syms[num_syms]; - unsigned offsets[max_codeword_len + 1]; - const unsigned table_num_entries = 1 << table_bits; - int left; - unsigned decode_table_pos; - void *decode_table_ptr; - unsigned sym_idx; - unsigned codeword_len; - unsigned stores_per_loop; - -#ifdef USE_LONG_FILL - const unsigned entries_per_long = sizeof(unsigned long) / sizeof(decode_table[0]); -#endif - -#ifdef USE_SSE2_FILL - const unsigned entries_per_xmm = sizeof(__m128i) / sizeof(decode_table[0]); -#endif - - wimlib_assert2((uintptr_t)decode_table % DECODE_TABLE_ALIGNMENT == 0); - - /* accumulate lengths for codes */ - for (unsigned i = 0; i <= max_codeword_len; i++) - len_counts[i] = 0; - - for (unsigned sym = 0; sym < num_syms; sym++) { - wimlib_assert2(lens[sym] <= max_codeword_len); - len_counts[lens[sym]]++; - } - - /* check for an over-subscribed or incomplete set of lengths */ - left = 1; - for (unsigned len = 1; len <= max_codeword_len; len++) { - left <<= 1; - left -= len_counts[len]; - if (unlikely(left < 0)) { /* over-subscribed */ - DEBUG("Invalid Huffman code (over-subscribed)"); - return -1; - } - } - - if (unlikely(left != 0)) /* incomplete set */{ - if (left == 1 << max_codeword_len) { - /* Empty code--- okay in XPRESS and LZX */ - memset(decode_table, 0, - table_num_entries * sizeof(decode_table[0])); - return 0; - } else { - DEBUG("Invalid Huffman code (incomplete set)"); - return -1; - } - } - - /* Generate offsets into symbol table for each length for sorting */ - offsets[1] = 0; - for (unsigned len = 1; len < max_codeword_len; len++) - offsets[len + 1] = offsets[len] + len_counts[len]; - - /* Sort symbols primarily by length and secondarily by symbol order. - * This is basically a count-sort over the codeword lengths. */ - for (unsigned sym = 0; sym < num_syms; sym++) - if (lens[sym] != 0) - sorted_syms[offsets[lens[sym]]++] = sym; - - /* Fill entries for codewords short enough for a direct mapping. We can - * take advantage of the ordering of the codewords, since the Huffman - * code is canonical. It must be the case that all the codewords of - * some length L numerically precede all the codewords of length L + 1. - * Furthermore, if we have 2 symbols A and B with the same codeword - * length but symbol A is sorted before symbol B, then then we know that - * the codeword for A numerically precedes the codeword for B. */ - decode_table_ptr = decode_table; - sym_idx = 0; - codeword_len = 1; -#ifdef USE_SSE2_FILL - /* Fill in the Huffman decode table entries one 128-bit vector at a - * time. This is 8 entries per store. */ - stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_xmm; - for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { - unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; - for (; sym_idx < end_sym_idx; sym_idx++) { - /* Note: unlike in the 'long' version below, the __m128i - * type already has __attribute__((may_alias)), so using - * it to access the decode table, which is an array of - * unsigned shorts, will not violate strict aliasing. */ - u16 sym; - __m128i v; - __m128i *p; - unsigned n; - - sym = sorted_syms[sym_idx]; - - v = _mm_set1_epi16(sym); - p = (__m128i*)decode_table_ptr; - n = stores_per_loop; - do { - *p++ = v; - } while (--n); - decode_table_ptr = p; - } - } -#endif /* USE_SSE2_FILL */ - -#ifdef USE_LONG_FILL - /* Fill in the Huffman decode table entries one 'unsigned long' at a - * time. On 32-bit systems this is 2 entries per store, while on 64-bit - * systems this is 4 entries per store. */ - stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_long; - for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { - unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; - for (; sym_idx < end_sym_idx; sym_idx++) { - - /* Accessing the array of unsigned shorts as unsigned - * longs would violate strict aliasing and would require - * compiling the code with -fno-strict-aliasing to - * guarantee correctness. To work around this problem, - * use the gcc 'may_alias' extension to define a special - * unsigned long type that may alias any other in-memory - * variable. */ - typedef unsigned long __attribute__((may_alias)) aliased_long_t; - - u16 sym; - aliased_long_t *p; - aliased_long_t v; - unsigned n; - - sym = sorted_syms[sym_idx]; - - BUILD_BUG_ON(sizeof(aliased_long_t) != 4 && - sizeof(aliased_long_t) != 8); + return (ctype >= 0 && + ctype < ARRAY_LEN(decompressor_ops) && + decompressor_ops[ctype] != NULL); +} - v = sym; - if (sizeof(aliased_long_t) >= 4) - v |= v << 16; - if (sizeof(aliased_long_t) >= 8) { - /* This may produce a compiler warning if an - * aliased_long_t is 32 bits, but this won't be - * executed unless an aliased_long_t is at least - * 64 bits anyway. */ - v |= v << 32; - } +WIMLIBAPI int +wimlib_set_default_decompressor_params(enum wimlib_compression_type ctype, + const struct wimlib_decompressor_params_header *params) +{ + struct wimlib_decompressor_params_header *dup; - p = (aliased_long_t *)decode_table_ptr; - n = stores_per_loop; + if (!decompressor_ctype_valid(ctype)) + return WIMLIB_ERR_INVALID_COMPRESSION_TYPE; - do { - *p++ = v; - } while (--n); - decode_table_ptr = p; - } + dup = NULL; + if (params) { + dup = memdup(params, params->size); + if (dup == NULL) + return WIMLIB_ERR_NOMEM; } -#endif /* USE_LONG_FILL */ - - /* Fill in the Huffman decode table entries one 16-bit integer at a - * time. */ - stores_per_loop = (1 << (table_bits - codeword_len)); - for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { - unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; - for (; sym_idx < end_sym_idx; sym_idx++) { - u16 sym; - u16 *p; - unsigned n; - - sym = sorted_syms[sym_idx]; - p = (u16*)decode_table_ptr; - n = stores_per_loop; - - do { - *p++ = sym; - } while (--n); + FREE(decompressor_default_params[ctype]); + decompressor_default_params[ctype] = dup; + return 0; +} - decode_table_ptr = p; - } +void +cleanup_decompressor_params(void) +{ + for (size_t i = 0; i < ARRAY_LEN(decompressor_default_params); i++) { + FREE(decompressor_default_params[i]); + decompressor_default_params[i] = NULL; } +} - /* If we've filled in the entire table, we are done. Otherwise, there - * are codes longer than table bits that we need to store in the - * tree-like structure at the end of the table rather than directly in - * the main decode table itself. */ - - decode_table_pos = (u16*)decode_table_ptr - decode_table; - if (decode_table_pos != table_num_entries) { - unsigned j; - unsigned next_free_tree_slot; - unsigned cur_codeword; - - wimlib_assert2(decode_table_pos < table_num_entries); - - /* Fill in the remaining entries, which correspond to codes - * longer than @table_bits. - * - * First, zero out the rest of the entries. This is necessary - * so that the entries appear as "unallocated" in the next part. - * */ - j = decode_table_pos; - do { - decode_table[j] = 0; - } while (++j != table_num_entries); - - /* Assert that 2**table_bits is at least num_syms. If this - * wasn't the case, we wouldn't be able to distinguish pointer - * entries from symbol entries. */ - wimlib_assert2(table_num_entries >= num_syms); - - - /* The tree nodes are allocated starting at decode_table[1 << - * table_bits]. Remember that the full size of the table, - * including the extra space for the tree nodes, is actually - * 2**table_bits + 2 * num_syms slots, while table_num_entries - * is only 2**table_bits. */ - next_free_tree_slot = table_num_entries; - - /* The current Huffman codeword */ - cur_codeword = decode_table_pos << 1; - - /* Go through every codeword of length greater than @table_bits, - * primarily in order of codeword length and secondarily in - * order of symbol. */ - wimlib_assert2(codeword_len == table_bits + 1); - for (; codeword_len <= max_codeword_len; codeword_len++, cur_codeword <<= 1) - { - unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; - for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++) { - unsigned sym = sorted_syms[sym_idx]; - unsigned extra_bits = codeword_len - table_bits; - - /* index of the current node; find it from the - * prefix of the current Huffman codeword. */ - unsigned node_idx = cur_codeword >> extra_bits; - wimlib_assert2(node_idx < table_num_entries); - - /* Go through each bit of the current Huffman - * codeword beyond the prefix of length - * @table_bits and walk the tree, allocating any - * slots that have not yet been allocated. */ - do { - - /* If the current tree node points to - * nowhere but we need to follow it, - * allocate a new node for it to point - * to. */ - if (decode_table[node_idx] == 0) { - decode_table[node_idx] = next_free_tree_slot; - decode_table[next_free_tree_slot++] = 0; - decode_table[next_free_tree_slot++] = 0; - wimlib_assert2(next_free_tree_slot <= - table_num_entries + 2 * num_syms); - } - - /* Set node_idx to left child */ - node_idx = decode_table[node_idx]; - - /* Is the next bit 0 or 1? If 0, go left - * (already done). If 1, go right by - * incrementing node_idx. */ - --extra_bits; - node_idx += (cur_codeword >> extra_bits) & 1; - } while (extra_bits != 0); - - /* node_idx is now the index of the leaf entry - * into which the actual symbol will go. */ - decode_table[node_idx] = sym; - - /* Note: cur_codeword is always incremented at - * the end of this loop because this is how - * canonical Huffman codes are generated (add 1 - * for each code, then left shift whenever the - * code length increases) */ - } +WIMLIBAPI int +wimlib_create_decompressor(enum wimlib_compression_type ctype, + size_t max_block_size, + const struct wimlib_decompressor_params_header *extra_params, + struct wimlib_decompressor **dec_ret) +{ + struct wimlib_decompressor *dec; + + if (dec_ret == NULL) + return WIMLIB_ERR_INVALID_PARAM; + + if (!decompressor_ctype_valid(ctype)) + return WIMLIB_ERR_INVALID_COMPRESSION_TYPE; + + dec = MALLOC(sizeof(*dec)); + if (dec == NULL) + return WIMLIB_ERR_NOMEM; + dec->ops = decompressor_ops[ctype]; + dec->private = NULL; + if (dec->ops->create_decompressor) { + const struct wimlib_decompressor_params_header *params; + int ret; + + if (extra_params) + params = extra_params; + else + params = decompressor_default_params[ctype]; + ret = dec->ops->create_decompressor(max_block_size, + params, + &dec->private); + if (ret) { + FREE(dec); + return ret; } } + *dec_ret = dec; return 0; } -/* Reads a Huffman-encoded symbol from the bistream when the number of remaining - * bits is less than the maximum codeword length. */ -int -read_huffsym_near_end_of_input(struct input_bitstream *istream, - const u16 decode_table[], - const u8 lens[], - unsigned num_syms, - unsigned table_bits, - unsigned *n) +WIMLIBAPI int +wimlib_decompress(const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size, + struct wimlib_decompressor *dec) { - unsigned bitsleft = istream->bitsleft; - unsigned key_size; - u16 sym; - u16 key_bits; - - if (table_bits > bitsleft) { - key_size = bitsleft; - bitsleft = 0; - key_bits = bitstream_peek_bits(istream, key_size) << - (table_bits - key_size); - } else { - key_size = table_bits; - bitsleft -= table_bits; - key_bits = bitstream_peek_bits(istream, table_bits); - } + return dec->ops->decompress(compressed_data, compressed_size, + uncompressed_data, uncompressed_size, + dec->private); +} - sym = decode_table[key_bits]; - if (sym >= num_syms) { - bitstream_remove_bits(istream, key_size); - do { - if (bitsleft == 0) - return -1; - key_bits = sym + bitstream_peek_bits(istream, 1); - bitstream_remove_bits(istream, 1); - bitsleft--; - } while ((sym = decode_table[key_bits]) >= num_syms); - } else { - bitstream_remove_bits(istream, lens[sym]); +WIMLIBAPI void +wimlib_free_decompressor(struct wimlib_decompressor *dec) +{ + if (dec) { + if (dec->ops->free_decompressor) + dec->ops->free_decompressor(dec->private); + FREE(dec); } - *n = sym; - return 0; } diff --git a/src/decompress_common.c b/src/decompress_common.c new file mode 100644 index 00000000..767527d9 --- /dev/null +++ b/src/decompress_common.c @@ -0,0 +1,432 @@ +/* + * decompress_common.c + * + * Code for decompression shared among multiple compression formats. + */ + +/* + * Copyright (C) 2012, 2013 Eric Biggers + * + * This file is part of wimlib, a library for working with WIM files. + * + * wimlib is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. + * + * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with wimlib; if not, see http://www.gnu.org/licenses/. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wimlib/decompress_common.h" +#include "wimlib/error.h" +#include "wimlib/util.h" + +#include + +#ifdef __GNUC__ +# ifdef __SSE2__ +# define USE_SSE2_FILL +# include +# else +# define USE_LONG_FILL +# endif +#endif + +/* + * make_huffman_decode_table: - Builds a fast huffman decoding table from an + * array that gives the length of the codeword for each symbol in the alphabet. + * Originally based on code written by David Tritscher (taken the original LZX + * decompression code); also heavily modified to add some optimizations used in + * the zlib code, as well as more comments; also added some optimizations to + * make filling in the decode table entries faster (may not help significantly + * though). + * + * @decode_table: The array in which to create the fast huffman decoding + * table. It must have a length of at least + * (2**table_bits) + 2 * num_syms to guarantee + * that there is enough space. Also must be 16-byte + * aligned (at least when USE_SSE2_FILL gets defined). + * + * @num_syms: Number of symbols in the alphabet, including symbols + * that do not appear in this particular input chunk. + * + * @table_bits: Any symbols with a code length of table_bits or less can + * be decoded in one lookup of the table. 2**table_bits + * must be greater than or equal to @num_syms if there are + * any Huffman codes longer than @table_bits. + * + * @lens: An array of length @num_syms, indexable by symbol, that + * gives the length of the Huffman codeword for that + * symbol. Because the Huffman tree is in canonical form, + * it can be reconstructed by only knowing the length of + * the codeword for each symbol. It is assumed, but not + * checked, that every length is less than + * @max_codeword_len. + * + * @max_codeword_len: The longest codeword length allowed in the compression + * format. + * + * Returns 0 on success; returns -1 if the length values do not correspond to a + * valid Huffman tree. + * + * The format of the Huffamn decoding table is as follows. The first (1 << + * table_bits) entries of the table are indexed by chunks of the input of size + * @table_bits. If the next Huffman codeword in the input happens to have a + * length of exactly @table_bits, the symbol is simply read directly from the + * decoding table. Alternatively, if the next Huffman codeword has length _less + * than_ @table_bits, the symbol is also read directly from the decode table; + * this is possible because every entry in the table that is indexed by an + * integer that has the shorter codeword as a binary prefix is filled in with + * the appropriate symbol. If a codeword has length n <= table_bits, it will + * have 2**(table_bits - n) possible suffixes, and thus that many entries in the + * decoding table. + * + * It's a bit more complicated if the next Huffman codeword has length of more + * than @table_bits. The table entry indexed by the first @table_bits of that + * codeword cannot give the appropriate symbol directly, because that entry is + * guaranteed to be referenced by the Huffman codewords of multiple symbols. + * And while the LZX compression format does not allow codes longer than 16 + * bits, a table of size (2 ** 16) = 65536 entries would be too slow to create. + * + * There are several different ways to make it possible to look up the symbols + * for codewords longer than @table_bits. One way is to make the entries for + * the prefixes of length @table_bits of those entries be pointers to additional + * decoding tables that are indexed by some number of additional bits of the + * codeword. The technique used here is a bit simpler, however: just store the + * needed subtrees of the Huffman tree in the decoding table after the lookup + * entries, beginning at index (2**table_bits). Real pointers are replaced by + * indices into the decoding table, and symbol entries are distinguished from + * pointers by the fact that values less than @num_syms must be symbol values. + */ +int +make_huffman_decode_table(u16 *decode_table, unsigned num_syms, + unsigned table_bits, const u8 *lens, + unsigned max_codeword_len) +{ + unsigned len_counts[max_codeword_len + 1]; + u16 sorted_syms[num_syms]; + unsigned offsets[max_codeword_len + 1]; + const unsigned table_num_entries = 1 << table_bits; + int left; + unsigned decode_table_pos; + void *decode_table_ptr; + unsigned sym_idx; + unsigned codeword_len; + unsigned stores_per_loop; + +#ifdef USE_LONG_FILL + const unsigned entries_per_long = sizeof(unsigned long) / sizeof(decode_table[0]); +#endif + +#ifdef USE_SSE2_FILL + const unsigned entries_per_xmm = sizeof(__m128i) / sizeof(decode_table[0]); +#endif + + wimlib_assert2((uintptr_t)decode_table % DECODE_TABLE_ALIGNMENT == 0); + + /* accumulate lengths for codes */ + for (unsigned i = 0; i <= max_codeword_len; i++) + len_counts[i] = 0; + + for (unsigned sym = 0; sym < num_syms; sym++) { + wimlib_assert2(lens[sym] <= max_codeword_len); + len_counts[lens[sym]]++; + } + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (unsigned len = 1; len <= max_codeword_len; len++) { + left <<= 1; + left -= len_counts[len]; + if (unlikely(left < 0)) { /* over-subscribed */ + DEBUG("Invalid Huffman code (over-subscribed)"); + return -1; + } + } + + if (unlikely(left != 0)) /* incomplete set */{ + if (left == 1 << max_codeword_len) { + /* Empty code--- okay in XPRESS and LZX */ + memset(decode_table, 0, + table_num_entries * sizeof(decode_table[0])); + return 0; + } else { + DEBUG("Invalid Huffman code (incomplete set)"); + return -1; + } + } + + /* Generate offsets into symbol table for each length for sorting */ + offsets[1] = 0; + for (unsigned len = 1; len < max_codeword_len; len++) + offsets[len + 1] = offsets[len] + len_counts[len]; + + /* Sort symbols primarily by length and secondarily by symbol order. + * This is basically a count-sort over the codeword lengths. */ + for (unsigned sym = 0; sym < num_syms; sym++) + if (lens[sym] != 0) + sorted_syms[offsets[lens[sym]]++] = sym; + + /* Fill entries for codewords short enough for a direct mapping. We can + * take advantage of the ordering of the codewords, since the Huffman + * code is canonical. It must be the case that all the codewords of + * some length L numerically precede all the codewords of length L + 1. + * Furthermore, if we have 2 symbols A and B with the same codeword + * length but symbol A is sorted before symbol B, then then we know that + * the codeword for A numerically precedes the codeword for B. */ + decode_table_ptr = decode_table; + sym_idx = 0; + codeword_len = 1; +#ifdef USE_SSE2_FILL + /* Fill in the Huffman decode table entries one 128-bit vector at a + * time. This is 8 entries per store. */ + stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_xmm; + for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { + unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; + for (; sym_idx < end_sym_idx; sym_idx++) { + /* Note: unlike in the 'long' version below, the __m128i + * type already has __attribute__((may_alias)), so using + * it to access the decode table, which is an array of + * unsigned shorts, will not violate strict aliasing. */ + u16 sym; + __m128i v; + __m128i *p; + unsigned n; + + sym = sorted_syms[sym_idx]; + + v = _mm_set1_epi16(sym); + p = (__m128i*)decode_table_ptr; + n = stores_per_loop; + do { + *p++ = v; + } while (--n); + decode_table_ptr = p; + } + } +#endif /* USE_SSE2_FILL */ + +#ifdef USE_LONG_FILL + /* Fill in the Huffman decode table entries one 'unsigned long' at a + * time. On 32-bit systems this is 2 entries per store, while on 64-bit + * systems this is 4 entries per store. */ + stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_long; + for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { + unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; + for (; sym_idx < end_sym_idx; sym_idx++) { + + /* Accessing the array of unsigned shorts as unsigned + * longs would violate strict aliasing and would require + * compiling the code with -fno-strict-aliasing to + * guarantee correctness. To work around this problem, + * use the gcc 'may_alias' extension to define a special + * unsigned long type that may alias any other in-memory + * variable. */ + typedef unsigned long __attribute__((may_alias)) aliased_long_t; + + u16 sym; + aliased_long_t *p; + aliased_long_t v; + unsigned n; + + sym = sorted_syms[sym_idx]; + + BUILD_BUG_ON(sizeof(aliased_long_t) != 4 && + sizeof(aliased_long_t) != 8); + + v = sym; + if (sizeof(aliased_long_t) >= 4) + v |= v << 16; + if (sizeof(aliased_long_t) >= 8) { + /* This may produce a compiler warning if an + * aliased_long_t is 32 bits, but this won't be + * executed unless an aliased_long_t is at least + * 64 bits anyway. */ + v |= v << 32; + } + + p = (aliased_long_t *)decode_table_ptr; + n = stores_per_loop; + + do { + *p++ = v; + } while (--n); + decode_table_ptr = p; + } + } +#endif /* USE_LONG_FILL */ + + /* Fill in the Huffman decode table entries one 16-bit integer at a + * time. */ + stores_per_loop = (1 << (table_bits - codeword_len)); + for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { + unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; + for (; sym_idx < end_sym_idx; sym_idx++) { + u16 sym; + u16 *p; + unsigned n; + + sym = sorted_syms[sym_idx]; + + p = (u16*)decode_table_ptr; + n = stores_per_loop; + + do { + *p++ = sym; + } while (--n); + + decode_table_ptr = p; + } + } + + /* If we've filled in the entire table, we are done. Otherwise, there + * are codes longer than table bits that we need to store in the + * tree-like structure at the end of the table rather than directly in + * the main decode table itself. */ + + decode_table_pos = (u16*)decode_table_ptr - decode_table; + if (decode_table_pos != table_num_entries) { + unsigned j; + unsigned next_free_tree_slot; + unsigned cur_codeword; + + wimlib_assert2(decode_table_pos < table_num_entries); + + /* Fill in the remaining entries, which correspond to codes + * longer than @table_bits. + * + * First, zero out the rest of the entries. This is necessary + * so that the entries appear as "unallocated" in the next part. + * */ + j = decode_table_pos; + do { + decode_table[j] = 0; + } while (++j != table_num_entries); + + /* Assert that 2**table_bits is at least num_syms. If this + * wasn't the case, we wouldn't be able to distinguish pointer + * entries from symbol entries. */ + wimlib_assert2(table_num_entries >= num_syms); + + + /* The tree nodes are allocated starting at decode_table[1 << + * table_bits]. Remember that the full size of the table, + * including the extra space for the tree nodes, is actually + * 2**table_bits + 2 * num_syms slots, while table_num_entries + * is only 2**table_bits. */ + next_free_tree_slot = table_num_entries; + + /* The current Huffman codeword */ + cur_codeword = decode_table_pos << 1; + + /* Go through every codeword of length greater than @table_bits, + * primarily in order of codeword length and secondarily in + * order of symbol. */ + wimlib_assert2(codeword_len == table_bits + 1); + for (; codeword_len <= max_codeword_len; codeword_len++, cur_codeword <<= 1) + { + unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; + for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++) { + unsigned sym = sorted_syms[sym_idx]; + unsigned extra_bits = codeword_len - table_bits; + + /* index of the current node; find it from the + * prefix of the current Huffman codeword. */ + unsigned node_idx = cur_codeword >> extra_bits; + wimlib_assert2(node_idx < table_num_entries); + + /* Go through each bit of the current Huffman + * codeword beyond the prefix of length + * @table_bits and walk the tree, allocating any + * slots that have not yet been allocated. */ + do { + + /* If the current tree node points to + * nowhere but we need to follow it, + * allocate a new node for it to point + * to. */ + if (decode_table[node_idx] == 0) { + decode_table[node_idx] = next_free_tree_slot; + decode_table[next_free_tree_slot++] = 0; + decode_table[next_free_tree_slot++] = 0; + wimlib_assert2(next_free_tree_slot <= + table_num_entries + 2 * num_syms); + } + + /* Set node_idx to left child */ + node_idx = decode_table[node_idx]; + + /* Is the next bit 0 or 1? If 0, go left + * (already done). If 1, go right by + * incrementing node_idx. */ + --extra_bits; + node_idx += (cur_codeword >> extra_bits) & 1; + } while (extra_bits != 0); + + /* node_idx is now the index of the leaf entry + * into which the actual symbol will go. */ + decode_table[node_idx] = sym; + + /* Note: cur_codeword is always incremented at + * the end of this loop because this is how + * canonical Huffman codes are generated (add 1 + * for each code, then left shift whenever the + * code length increases) */ + } + } + } + return 0; +} + +/* Reads a Huffman-encoded symbol from the bistream when the number of remaining + * bits is less than the maximum codeword length. */ +int +read_huffsym_near_end_of_input(struct input_bitstream *istream, + const u16 decode_table[], + const u8 lens[], + unsigned num_syms, + unsigned table_bits, + unsigned *n) +{ + unsigned bitsleft = istream->bitsleft; + unsigned key_size; + u16 sym; + u16 key_bits; + + if (table_bits > bitsleft) { + key_size = bitsleft; + bitsleft = 0; + key_bits = bitstream_peek_bits(istream, key_size) << + (table_bits - key_size); + } else { + key_size = table_bits; + bitsleft -= table_bits; + key_bits = bitstream_peek_bits(istream, table_bits); + } + + sym = decode_table[key_bits]; + if (sym >= num_syms) { + bitstream_remove_bits(istream, key_size); + do { + if (bitsleft == 0) + return -1; + key_bits = sym + bitstream_peek_bits(istream, 1); + bitstream_remove_bits(istream, 1); + bitsleft--; + } while ((sym = decode_table[key_bits]) >= num_syms); + } else { + bitstream_remove_bits(istream, lens[sym]); + } + *n = sym; + return 0; +} diff --git a/src/integrity.c b/src/integrity.c index 8e1df189..350b3a20 100644 --- a/src/integrity.c +++ b/src/integrity.c @@ -366,8 +366,7 @@ write_integrity_table(WIMStruct *wim, 0, &wim->hdr.integrity_table_reshdr, NULL, - 0, - &wim->lzx_context); + 0); FREE(new_table); out_free_old_table: FREE(old_table); diff --git a/src/lookup_table.c b/src/lookup_table.c index f54f0cc6..357d6438 100644 --- a/src/lookup_table.c +++ b/src/lookup_table.c @@ -323,7 +323,6 @@ for_lookup_table_entry(struct wim_lookup_table *table, hlist_for_each_entry_safe(lte, pos, tmp, &table->array[i], hash_list) { - wimlib_assert2(!(lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA)); ret = visitor(lte, arg); if (ret) return ret; @@ -869,8 +868,7 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, struct filedes *out_fd, u16 part_number, struct wim_reshdr *out_reshdr, - int write_resource_flags, - struct wimlib_lzx_context **comp_ctx) + int write_resource_flags) { size_t table_size; struct wim_lookup_table_entry *lte; @@ -950,8 +948,7 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, 0, out_reshdr, NULL, - write_resource_flags, - comp_ctx); + write_resource_flags); FREE(table_buf); DEBUG("ret=%d", ret); return ret; diff --git a/src/lz77.c b/src/lz77.c index 5729bd4c..b5495da7 100644 --- a/src/lz77.c +++ b/src/lz77.c @@ -30,7 +30,7 @@ # include #endif -#include "wimlib/compress.h" +#include "wimlib/compress_common.h" #include "wimlib/util.h" #include diff --git a/src/lzms-compress.c b/src/lzms-compress.c new file mode 100644 index 00000000..e5abf24a --- /dev/null +++ b/src/lzms-compress.c @@ -0,0 +1,129 @@ +/* + * lzms-compress.c + * + * A compressor for the LZMS compression format. + */ + +/* + * Copyright (C) 2013 Eric Biggers + * + * This file is part of wimlib, a library for working with WIM files. + * + * wimlib is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. + * + * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with wimlib; if not, see http://www.gnu.org/licenses/. + */ + +/* This a compressor for the LZMS compression format. More details about this + * format can be found in lzms-decompress.c. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wimlib.h" +#include "wimlib/assert.h" +#include "wimlib/compressor_ops.h" +#include "wimlib/compress_common.h" +#include "wimlib/error.h" +#include "wimlib/lzms.h" +#include "wimlib/util.h" + +#include + +struct lzms_compressor { + u8 *window; + u32 window_size; + u32 max_block_size; + + s32 *last_target_usages; +}; + +static void +lzms_preprocess_data(u8 *data, s32 size, s32 *last_target_usages) +{ + for (s32 i = 0; i < size - 11; i++) { + } +} + +static size_t +lzms_compress(const void *uncompressed_data, size_t uncompressed_size, + void *compressed_data, size_t compressed_size_avail, void *_ctx) +{ + struct lzms_compressor *ctx = _ctx; + + if (uncompressed_size > ctx->max_block_size) { + LZMS_DEBUG("Can't compress %su bytes: LZMS context " + "only supports %u bytes", + uncompressed_size, ctx->max_block_size); + return 0; + } + + memcpy(ctx->window, uncompressed_data, uncompressed_size); + ctx->window_size = uncompressed_size; + + lzms_preprocess_data(ctx->window, ctx->window_size, + ctx->last_target_usages); + + return 0; +} + +static void +lzms_free_compressor(void *_ctx) +{ + struct lzms_compressor *ctx = _ctx; + + if (ctx) { + FREE(ctx->window); + FREE(ctx->last_target_usages); + FREE(ctx); + } +} + +static int +lzms_create_compressor(size_t max_block_size, + const struct wimlib_compressor_params_header *params, + void **ctx_ret) +{ + struct lzms_compressor *ctx; + + if (max_block_size == 0 || max_block_size > 1U << 26) { + LZMS_DEBUG("Invalid max_block_size (%u)", max_block_size); + return WIMLIB_ERR_INVALID_PARAM; + } + + ctx = CALLOC(1, sizeof(struct lzms_compressor)); + if (ctx == NULL) + goto oom; + + ctx->window = MALLOC(max_block_size); + if (ctx->window == NULL) + goto oom; + ctx->max_block_size = max_block_size; + + ctx->last_target_usages = MALLOC(65536 * sizeof(ctx->last_target_usages[0])); + if (ctx->last_target_usages == NULL) + goto oom; + + *ctx_ret = ctx; + return 0; + +oom: + lzms_free_compressor(ctx); + return WIMLIB_ERR_NOMEM; +} + +const struct compressor_ops lzms_compressor_ops = { + .create_compressor = lzms_create_compressor, + .compress = lzms_compress, + .free_compressor = lzms_free_compressor, +}; diff --git a/src/lzms-decompress.c b/src/lzms-decompress.c index 186e88d0..40d35bf6 100644 --- a/src/lzms-decompress.c +++ b/src/lzms-decompress.c @@ -193,8 +193,9 @@ #endif #include "wimlib.h" -#include "wimlib/compress.h" -#include "wimlib/decompress.h" +#include "wimlib/compress_common.h" +#include "wimlib/decompressor_ops.h" +#include "wimlib/decompress_common.h" #include "wimlib/error.h" #include "wimlib/lzms.h" #include "wimlib/util.h" @@ -381,6 +382,9 @@ struct lzms_decompressor { u32 upcoming_lz_offset; u32 upcoming_delta_power; u32 upcoming_delta_offset; + + /* Used for postprocessing */ + s32 last_target_usages[65536]; }; /* A table that maps position slots to their base values. These are constants @@ -1093,18 +1097,16 @@ lzms_init_decompressor(struct lzms_decompressor *ctx, /* Decode the series of literals and matches from the LZMS-compressed data. * Returns 0 on success; nonzero if the compressed data is invalid. */ static int -lzms_decode_items(const u8 *cdata, size_t clen, u8 *ubuf, size_t ulen) +lzms_decode_items(const u8 *cdata, size_t clen, u8 *ubuf, size_t ulen, + struct lzms_decompressor *ctx) { - /* XXX: The context could be allocated on the heap. */ - struct lzms_decompressor ctx; - /* Initialize the LZMS decompressor. */ - lzms_init_decompressor(&ctx, cdata, clen, ubuf, ulen); + lzms_init_decompressor(ctx, cdata, clen, ubuf, ulen); /* Decode the sequence of items. */ - while (ctx.out_next != ctx.out_end) { - LZMS_DEBUG("Position %u", ctx.out_next - ctx.out_begin); - if (lzms_decode_item(&ctx)) + while (ctx->out_next != ctx->out_end) { + LZMS_DEBUG("Position %u", ctx->out_next - ctx->out_begin); + if (lzms_decode_item(ctx)) return -1; } return 0; @@ -1216,17 +1218,15 @@ lzms_process_x86_translation(u8 *ubuf, s32 i, s32 *closest_target_usage_p, * actually needs to be done (or to plug in alternate filters, like in LZMA), * and the corresponding preprocessing seems to be done unconditionally. */ static void -lzms_postprocess_data(u8 *ubuf, s32 ulen) +lzms_postprocess_data(u8 *ubuf, s32 ulen, s32 *last_target_usages) { /* Offset (from beginning of buffer) of the most recent reference to a * seemingly valid target address. */ s32 closest_target_usage = -LZMS_X86_MAX_TRANSLATION_OFFSET - 1; - /* Offset (from beginning of buffer) of the most recently used target - * address beginning with two bytes equal to the array index. - * - * XXX: This array could be allocated on the heap. */ - s32 last_target_usages[65536]; + /* Initialize the last_target_usages array. Each entry will contain the + * offset (from beginning of buffer) of the most recently used target + * address beginning with two bytes equal to the array index. */ for (s32 i = 0; i < 65536; i++) last_target_usages[i] = -LZMS_X86_MAX_GOOD_TARGET_OFFSET - 1; @@ -1238,48 +1238,81 @@ lzms_postprocess_data(u8 *ubuf, s32 ulen) last_target_usages); } -/* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_lzms_decompress(const void *cdata, unsigned clen, - void *ubuf, unsigned ulen) +static int +lzms_decompress(const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size, void *_ctx) { + struct lzms_decompressor *ctx = _ctx; + /* The range decoder requires that a minimum of 4 bytes of compressed * data be initially available. */ - if (clen < 4) { - LZMS_DEBUG("Compressed length too small (got %u, expected >= 4)", - clen); + if (compressed_size < 4) { + LZMS_DEBUG("Compressed size too small (got %zu, expected >= 4)", + compressed_size); return -1; } /* A LZMS-compressed data block should be evenly divisible into 16-bit * integers. */ - if (clen % 2 != 0) { - LZMS_DEBUG("Compressed length not divisible by 2 (got %u)", clen); + if (compressed_size % 2 != 0) { + LZMS_DEBUG("Compressed size not divisible by 2 (got %zu)", + compressed_size); return -1; } /* Handle the trivial case where nothing needs to be decompressed. * (Necessary because a window of size 0 does not have a valid position * slot.) */ - if (ulen == 0) + if (uncompressed_size == 0) return 0; /* The x86 post-processor requires that the uncompressed length fit into * a signed 32-bit integer. Also, the position slot table cannot be * searched for a position of INT32_MAX or greater. */ - if (ulen >= INT32_MAX) { + if (uncompressed_size >= INT32_MAX) { LZMS_DEBUG("Uncompressed length too large " "(got %u, expected < INT32_MAX)", ulen); return -1; } /* Decode the literals and matches. */ - if (lzms_decode_items(cdata, clen, ubuf, ulen)) + if (lzms_decode_items(compressed_data, compressed_size, + uncompressed_data, uncompressed_size, ctx)) return -1; /* Postprocess the data. */ - lzms_postprocess_data(ubuf, ulen); + lzms_postprocess_data(uncompressed_data, uncompressed_size, + ctx->last_target_usages); LZMS_DEBUG("Decompression successful."); return 0; } + +static void +lzms_free_decompressor(void *_ctx) +{ + struct lzms_decompressor *ctx = _ctx; + + FREE(ctx); +} + +static int +lzms_create_decompressor(size_t max_block_size, + const struct wimlib_decompressor_params_header *params, + void **ctx_ret) +{ + struct lzms_decompressor *ctx; + + ctx = MALLOC(sizeof(struct lzms_decompressor)); + if (ctx == NULL) + return WIMLIB_ERR_NOMEM; + + *ctx_ret = ctx; + return 0; +} + +const struct decompressor_ops lzms_decompressor_ops = { + .create_decompressor = lzms_create_decompressor, + .decompress = lzms_decompress, + .free_decompressor = lzms_free_decompressor, +}; diff --git a/src/lzx-common.c b/src/lzx-common.c index 547059e7..5f522835 100644 --- a/src/lzx-common.c +++ b/src/lzx-common.c @@ -66,11 +66,11 @@ const u8 lzx_extra_bits[LZX_MAX_POSITION_SLOTS] = { }; #endif -/* LZX window size can be between 2^15 and 2^21, inclusively. */ +/* LZX window size must be a power of 2 between 2^15 and 2^21, inclusively. */ bool -lzx_window_size_valid(u32 window_size) +lzx_window_size_valid(size_t window_size) { - if (window_size == 0) + if (window_size == 0 || (u32)window_size != window_size) return false; u32 order = bsr32(window_size); if (window_size != 1U << order) diff --git a/src/lzx-compress.c b/src/lzx-compress.c index c72fd460..e4676dca 100644 --- a/src/lzx-compress.c +++ b/src/lzx-compress.c @@ -189,7 +189,8 @@ #endif #include "wimlib.h" -#include "wimlib/compress.h" +#include "wimlib/compressor_ops.h" +#include "wimlib/compress_common.h" #include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/lzx.h" @@ -199,7 +200,7 @@ #include #ifdef ENABLE_LZX_DEBUG -# include "wimlib/decompress.h" +# include "wimlib/decompress_common.h" #endif #include "divsufsort/divsufsort.h" @@ -386,7 +387,7 @@ struct salink { struct lzx_compressor { /* The parameters that were used to create the compressor. */ - struct wimlib_lzx_params params; + struct wimlib_lzx_compressor_params params; /* The buffer of data to be compressed. * @@ -2211,34 +2212,32 @@ do_call_insn_preprocessing(u8 data[], int size) } } -/* API function documented in wimlib.h */ -WIMLIBAPI unsigned -wimlib_lzx_compress2(const void * const restrict uncompressed_data, - unsigned const uncompressed_len, - void * const restrict compressed_data, - struct wimlib_lzx_context * const restrict lzx_ctx) +static size_t +lzx_compress(const void *uncompressed_data, size_t uncompressed_size, + void *compressed_data, size_t compressed_size_avail, void *_ctx) { - struct lzx_compressor *ctx = (struct lzx_compressor*)lzx_ctx; + struct lzx_compressor *ctx = _ctx; struct output_bitstream ostream; - input_idx_t compressed_len; + size_t compressed_size; - if (uncompressed_len < 100) { + if (uncompressed_size < 100) { LZX_DEBUG("Too small to bother compressing."); return 0; } - if (uncompressed_len > ctx->max_window_size) { - LZX_DEBUG("Can't compress %u bytes using window of %u bytes!", - uncompressed_len, ctx->max_window_size); + if (uncompressed_size > ctx->max_window_size) { + LZX_DEBUG("Can't compress %zu bytes using window of %u bytes!", + uncompressed_size, ctx->max_window_size); return 0; } - LZX_DEBUG("Attempting to compress %u bytes...", uncompressed_len); + LZX_DEBUG("Attempting to compress %zu bytes...", + uncompressed_size); /* The input data must be preprocessed. To avoid changing the original * input, copy it to a temporary buffer. */ - memcpy(ctx->window, uncompressed_data, uncompressed_len); - ctx->window_size = uncompressed_len; + memcpy(ctx->window, uncompressed_data, uncompressed_size); + ctx->window_size = uncompressed_size; /* This line is unnecessary; it just avoids inconsequential accesses of * uninitialized memory that would show up in memory-checking tools such @@ -2262,18 +2261,19 @@ wimlib_lzx_compress2(const void * const restrict uncompressed_data, LZX_DEBUG("Writing compressed blocks..."); /* Generate the compressed data. */ - init_output_bitstream(&ostream, compressed_data, ctx->window_size - 1); + init_output_bitstream(&ostream, compressed_data, compressed_size_avail); lzx_write_all_blocks(ctx, &ostream); LZX_DEBUG("Flushing bitstream..."); - compressed_len = flush_output_bitstream(&ostream); - if (compressed_len == ~(input_idx_t)0) { - LZX_DEBUG("Data did not compress to less than original length!"); + compressed_size = flush_output_bitstream(&ostream); + if (compressed_size == ~(input_idx_t)0) { + LZX_DEBUG("Data did not compress to %zu bytes or less!", + compressed_size_avail); return 0; } - LZX_DEBUG("Done: compressed %u => %u bytes.", - uncompressed_len, compressed_len); + LZX_DEBUG("Done: compressed %zu => %zu bytes.", + uncompressed_size, compressed_size); /* Verify that we really get the same thing back when decompressing. * Although this could be disabled by default in all cases, it only @@ -2285,44 +2285,46 @@ wimlib_lzx_compress2(const void * const restrict uncompressed_data, #endif ) { - /* The decompression buffer can be any temporary space that's no - * longer needed. */ - u8 *buf = (u8*)(ctx->SA ? ctx->SA : ctx->prev_tab); + struct wimlib_decompressor *decompressor; - if (wimlib_lzx_decompress2(compressed_data, compressed_len, - buf, uncompressed_len, ctx->max_window_size)) + if (0 == wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_LZX, + ctx->max_window_size, + NULL, + &decompressor)) { - ERROR("Failed to decompress data we " - "compressed using LZX algorithm"); - wimlib_assert(0); - return 0; - } - - if (memcmp(uncompressed_data, buf, uncompressed_len)) { - ERROR("Data we compressed using LZX algorithm " - "didn't decompress to original"); - wimlib_assert(0); - return 0; + int ret; + ret = wimlib_decompress(compressed_data, + compressed_size, + ctx->window, + uncompressed_size, + decompressor); + wimlib_free_decompressor(decompressor); + + if (ret) { + ERROR("Failed to decompress data we " + "compressed using LZX algorithm"); + wimlib_assert(0); + return 0; + } + if (memcmp(uncompressed_data, ctx->window, uncompressed_size)) { + ERROR("Data we compressed using LZX algorithm " + "didn't decompress to original"); + wimlib_assert(0); + return 0; + } + } else { + WARNING("Failed to create decompressor for " + "data verification!"); } } - return compressed_len; -} - -static bool -lzx_params_compatible(const struct wimlib_lzx_params *oldparams, - const struct wimlib_lzx_params *newparams) -{ - return 0 == memcmp(oldparams, newparams, sizeof(struct wimlib_lzx_params)); + return compressed_size; } -static struct wimlib_lzx_params lzx_user_default_params; -static struct wimlib_lzx_params *lzx_user_default_params_ptr; - static bool -lzx_params_valid(const struct wimlib_lzx_params *params) +lzx_params_valid(const struct wimlib_lzx_compressor_params *params) { /* Validate parameters. */ - if (params->size_of_this != sizeof(struct wimlib_lzx_params)) { + if (params->hdr.size != sizeof(struct wimlib_lzx_compressor_params)) { LZX_DEBUG("Invalid parameter structure size!"); return false; } @@ -2365,37 +2367,42 @@ lzx_params_valid(const struct wimlib_lzx_params *params) return true; } -/* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_lzx_set_default_params(const struct wimlib_lzx_params * params) +static void +lzx_free_compressor(void *_ctx) { - if (params) { - if (!lzx_params_valid(params)) - return WIMLIB_ERR_INVALID_PARAM; - lzx_user_default_params = *params; - lzx_user_default_params_ptr = &lzx_user_default_params; - } else { - lzx_user_default_params_ptr = NULL; + struct lzx_compressor *ctx = _ctx; + + if (ctx) { + FREE(ctx->chosen_matches); + FREE(ctx->cached_matches); + FREE(ctx->optimum); + FREE(ctx->salink); + FREE(ctx->SA); + FREE(ctx->block_specs); + FREE(ctx->prev_tab); + FREE(ctx->window); + FREE(ctx); } - return 0; } -/* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_lzx_alloc_context(u32 window_size, - const struct wimlib_lzx_params *params, - struct wimlib_lzx_context **ctx_pp) +static int +lzx_create_compressor(size_t window_size, + const struct wimlib_compressor_params_header *_params, + void **ctx_ret) { + const struct wimlib_lzx_compressor_params *params = + (const struct wimlib_lzx_compressor_params*)_params; + struct lzx_compressor *ctx; LZX_DEBUG("Allocating LZX context..."); if (!lzx_window_size_valid(window_size)) return WIMLIB_ERR_INVALID_PARAM; - struct lzx_compressor *ctx; - - static const struct wimlib_lzx_params fast_default = { - .size_of_this = sizeof(struct wimlib_lzx_params), + static const struct wimlib_lzx_compressor_params fast_default = { + .hdr = { + .size = sizeof(struct wimlib_lzx_compressor_params), + }, .algorithm = WIMLIB_LZX_ALGORITHM_FAST, .use_defaults = 0, .alg_params = { @@ -2403,8 +2410,10 @@ wimlib_lzx_alloc_context(u32 window_size, }, }, }; - static const struct wimlib_lzx_params slow_default = { - .size_of_this = sizeof(struct wimlib_lzx_params), + static const struct wimlib_lzx_compressor_params slow_default = { + .hdr = { + .size = sizeof(struct wimlib_lzx_compressor_params), + }, .algorithm = WIMLIB_LZX_ALGORITHM_SLOW, .use_defaults = 0, .alg_params = { @@ -2426,10 +2435,7 @@ wimlib_lzx_alloc_context(u32 window_size, return WIMLIB_ERR_INVALID_PARAM; } else { LZX_DEBUG("Using default algorithm and parameters."); - if (lzx_user_default_params_ptr) - params = lzx_user_default_params_ptr; - else - params = &slow_default; + params = &slow_default; } if (params->use_defaults) { @@ -2439,58 +2445,46 @@ wimlib_lzx_alloc_context(u32 window_size, params = &fast_default; } - if (ctx_pp) { - ctx = *(struct lzx_compressor**)ctx_pp; - - if (ctx && - lzx_params_compatible(&ctx->params, params) && - ctx->max_window_size == window_size) - return 0; - } else { - LZX_DEBUG("Check parameters only."); - return 0; - } - LZX_DEBUG("Allocating memory."); ctx = CALLOC(1, sizeof(struct lzx_compressor)); if (ctx == NULL) - goto err; + goto oom; ctx->num_main_syms = lzx_get_num_main_syms(window_size); ctx->max_window_size = window_size; ctx->window = MALLOC(window_size + 12); if (ctx->window == NULL) - goto err; + goto oom; if (params->algorithm == WIMLIB_LZX_ALGORITHM_FAST) { ctx->prev_tab = MALLOC(window_size * sizeof(ctx->prev_tab[0])); if (ctx->prev_tab == NULL) - goto err; + goto oom; } size_t block_specs_length = DIV_ROUND_UP(window_size, LZX_DIV_BLOCK_SIZE); ctx->block_specs = MALLOC(block_specs_length * sizeof(ctx->block_specs[0])); if (ctx->block_specs == NULL) - goto err; + goto oom; if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) { ctx->SA = MALLOC(3U * window_size * sizeof(ctx->SA[0])); if (ctx->SA == NULL) - goto err; + goto oom; ctx->ISA = ctx->SA + window_size; ctx->LCP = ctx->ISA + window_size; ctx->salink = MALLOC(window_size * sizeof(ctx->salink[0])); if (ctx->salink == NULL) - goto err; + goto oom; } if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) { ctx->optimum = MALLOC((LZX_OPTIM_ARRAY_SIZE + LZX_MAX_MATCH_LEN) * sizeof(ctx->optimum[0])); if (ctx->optimum == NULL) - goto err; + goto oom; } if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) { @@ -2503,71 +2497,28 @@ wimlib_lzx_alloc_context(u32 window_size, ctx->cached_matches = MALLOC(window_size * (cache_per_pos + 1) * sizeof(ctx->cached_matches[0])); if (ctx->cached_matches == NULL) - goto err; + goto oom; } ctx->chosen_matches = MALLOC(window_size * sizeof(ctx->chosen_matches[0])); if (ctx->chosen_matches == NULL) - goto err; + goto oom; - memcpy(&ctx->params, params, sizeof(struct wimlib_lzx_params)); + memcpy(&ctx->params, params, sizeof(struct wimlib_lzx_compressor_params)); memset(&ctx->zero_codes, 0, sizeof(ctx->zero_codes)); LZX_DEBUG("Successfully allocated new LZX context."); - wimlib_lzx_free_context(*ctx_pp); - *ctx_pp = (struct wimlib_lzx_context*)ctx; + *ctx_ret = ctx; return 0; -err: - wimlib_lzx_free_context((struct wimlib_lzx_context*)ctx); - LZX_DEBUG("Ran out of memory."); +oom: + lzx_free_compressor(ctx); return WIMLIB_ERR_NOMEM; } -/* API function documented in wimlib.h */ -WIMLIBAPI void -wimlib_lzx_free_context(struct wimlib_lzx_context *_ctx) -{ - struct lzx_compressor *ctx = (struct lzx_compressor*)_ctx; - - if (ctx) { - FREE(ctx->chosen_matches); - FREE(ctx->cached_matches); - FREE(ctx->optimum); - FREE(ctx->salink); - FREE(ctx->SA); - FREE(ctx->block_specs); - FREE(ctx->prev_tab); - FREE(ctx->window); - FREE(ctx); - } -} - -/* API function documented in wimlib.h */ -WIMLIBAPI unsigned -wimlib_lzx_compress(const void * const restrict uncompressed_data, - unsigned const uncompressed_len, - void * const restrict compressed_data) -{ - int ret; - struct wimlib_lzx_context *ctx = NULL; - unsigned compressed_len; - - ret = wimlib_lzx_alloc_context(32768, NULL, &ctx); - if (ret) { - wimlib_assert(ret != WIMLIB_ERR_INVALID_PARAM); - WARNING("Couldn't allocate LZX compression context: %"TS"", - wimlib_get_error_string(ret)); - return 0; - } - - compressed_len = wimlib_lzx_compress2(uncompressed_data, - uncompressed_len, - compressed_data, - ctx); - - wimlib_lzx_free_context(ctx); - - return compressed_len; -} +const struct compressor_ops lzx_compressor_ops = { + .create_compressor = lzx_create_compressor, + .compress = lzx_compress, + .free_compressor = lzx_free_compressor, +}; diff --git a/src/lzx-decompress.c b/src/lzx-decompress.c index b7872520..2405b949 100644 --- a/src/lzx-decompress.c +++ b/src/lzx-decompress.c @@ -108,7 +108,8 @@ #endif #include "wimlib.h" -#include "wimlib/decompress.h" +#include "wimlib/decompressor_ops.h" +#include "wimlib/decompress_common.h" #include "wimlib/lzx.h" #include "wimlib/util.h" @@ -135,6 +136,11 @@ struct lzx_tables { u8 alignedtree_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS]; } _aligned_attribute(DECODE_TABLE_ALIGNMENT); +struct lzx_decompressor { + u32 max_window_size; + unsigned num_main_syms; + struct lzx_tables tables; +}; /* * Reads a Huffman-encoded symbol using the pre-tree. @@ -705,7 +711,7 @@ lzx_decode_match(unsigned main_element, int block_type, } static void -undo_call_insn_translation(u32 *call_insn_target, int input_pos, +undo_call_insn_translation(u32 *call_insn_target, s32 input_pos, s32 file_size) { s32 abs_offset; @@ -747,9 +753,9 @@ undo_call_insn_translation(u32 *call_insn_target, int input_pos, * as it is used in calculating the translated jump targets. But in WIM files, * this file size is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).*/ static void -undo_call_insn_preprocessing(u8 uncompressed_data[], int uncompressed_data_len) +undo_call_insn_preprocessing(u8 *uncompressed_data, s32 uncompressed_size) { - for (int i = 0; i < uncompressed_data_len - 10; i++) { + for (s32 i = 0; i < uncompressed_size - 10; i++) { if (uncompressed_data[i] == 0xe8) { undo_call_insn_translation((u32*)&uncompressed_data[i + 1], i, @@ -818,47 +824,38 @@ lzx_decompress_block(int block_type, unsigned block_size, return 0; } -/* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_lzx_decompress2(const void *compressed_data, unsigned compressed_len, - void *uncompressed_data, unsigned uncompressed_len, - u32 max_window_size) +static int +lzx_decompress(const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size, + void *_ctx) { - struct lzx_tables tables; + struct lzx_decompressor *ctx = _ctx; struct input_bitstream istream; struct lzx_lru_queue queue; unsigned window_pos; unsigned block_size; unsigned block_type; - unsigned num_main_syms; int ret; bool e8_preprocessing_done; - LZX_DEBUG("compressed_data = %p, compressed_len = %u, " - "uncompressed_data = %p, uncompressed_len = %u, " + LZX_DEBUG("compressed_data = %p, compressed_size = %zu, " + "uncompressed_data = %p, uncompressed_size = %zu, " "max_window_size=%u).", - compressed_data, compressed_len, - uncompressed_data, uncompressed_len, max_window_size); + compressed_data, compressed_size, + uncompressed_data, uncompressed_size, + ctx->max_window_size); - if (!lzx_window_size_valid(max_window_size)) { - LZX_DEBUG("Window size of %u is invalid!", - max_window_size); - return -1; - } - - num_main_syms = lzx_get_num_main_syms(max_window_size); - - if (uncompressed_len > max_window_size) { - LZX_DEBUG("Uncompressed chunk size of %u exceeds " + if (uncompressed_size > ctx->max_window_size) { + LZX_DEBUG("Uncompressed size of %zu exceeds " "window size of %u!", - uncompressed_len, max_window_size); + uncompressed_size, ctx->max_window_size); return -1; } - memset(tables.maintree_lens, 0, sizeof(tables.maintree_lens)); - memset(tables.lentree_lens, 0, sizeof(tables.lentree_lens)); + memset(ctx->tables.maintree_lens, 0, sizeof(ctx->tables.maintree_lens)); + memset(ctx->tables.lentree_lens, 0, sizeof(ctx->tables.lentree_lens)); lzx_lru_queue_init(&queue); - init_input_bitstream(&istream, compressed_data, compressed_len); + init_input_bitstream(&istream, compressed_data, compressed_size); e8_preprocessing_done = false; /* Set to true if there may be 0xe8 bytes in the uncompressed data. */ @@ -869,23 +866,23 @@ wimlib_lzx_decompress2(const void *compressed_data, unsigned compressed_len, * blocks. */ for (window_pos = 0; - window_pos < uncompressed_len; + window_pos < uncompressed_size; window_pos += block_size) { LZX_DEBUG("Reading block header."); - ret = lzx_read_block_header(&istream, num_main_syms, - max_window_size, &block_size, - &block_type, &tables, &queue); + ret = lzx_read_block_header(&istream, ctx->num_main_syms, + ctx->max_window_size, &block_size, + &block_type, &ctx->tables, &queue); if (ret) return ret; LZX_DEBUG("block_size = %u, window_pos = %u", block_size, window_pos); - if (block_size > uncompressed_len - window_pos) { + if (block_size > uncompressed_size - window_pos) { LZX_DEBUG("Expected a block size of at " - "most %u bytes (found %u bytes)", - uncompressed_len - window_pos, block_size); + "most %zu bytes (found %u bytes)", + uncompressed_size - window_pos, block_size); return -1; } @@ -898,16 +895,16 @@ wimlib_lzx_decompress2(const void *compressed_data, unsigned compressed_len, LZX_DEBUG("LZX_BLOCKTYPE_ALIGNED"); ret = lzx_decompress_block(block_type, block_size, - num_main_syms, + ctx->num_main_syms, uncompressed_data, window_pos, - &tables, + &ctx->tables, &queue, &istream); if (ret) return ret; - if (tables.maintree_lens[0xe8] != 0) + if (ctx->tables.maintree_lens[0xe8] != 0) e8_preprocessing_done = true; break; case LZX_BLOCKTYPE_UNCOMPRESSED: @@ -934,16 +931,41 @@ wimlib_lzx_decompress2(const void *compressed_data, unsigned compressed_len, } } if (e8_preprocessing_done) - undo_call_insn_preprocessing(uncompressed_data, uncompressed_len); + undo_call_insn_preprocessing(uncompressed_data, uncompressed_size); return 0; } -/* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_lzx_decompress(const void *compressed_data, unsigned compressed_len, - void *uncompressed_data, unsigned uncompressed_len) +static void +lzx_free_decompressor(void *_ctx) { - return wimlib_lzx_decompress2(compressed_data, compressed_len, - uncompressed_data, uncompressed_len, - 32768); + struct lzx_decompressor *ctx = _ctx; + + FREE(ctx); } + +static int +lzx_create_decompressor(size_t max_window_size, + const struct wimlib_decompressor_params_header *params, + void **ctx_ret) +{ + struct lzx_decompressor *ctx; + + if (!lzx_window_size_valid(max_window_size)) + return WIMLIB_ERR_INVALID_PARAM; + + ctx = MALLOC(sizeof(struct lzx_decompressor)); + if (ctx == NULL) + return WIMLIB_ERR_NOMEM; + + ctx->max_window_size = max_window_size; + ctx->num_main_syms = lzx_get_num_main_syms(max_window_size); + + *ctx_ret = ctx; + return 0; +} + +const struct decompressor_ops lzx_decompressor_ops = { + .create_decompressor = lzx_create_decompressor, + .decompress = lzx_decompress, + .free_decompressor = lzx_free_decompressor, +}; diff --git a/src/metadata_resource.c b/src/metadata_resource.c index aebc5468..2d780e96 100644 --- a/src/metadata_resource.c +++ b/src/metadata_resource.c @@ -297,8 +297,7 @@ write_metadata_resource(WIMStruct *wim, int image, int write_resource_flags) wim->out_chunk_size, &imd->metadata_lte->out_reshdr, imd->metadata_lte->hash, - write_resource_flags, - &wim->lzx_context); + write_resource_flags); /* Original checksum was overridden; set a flag so it isn't used. */ imd->metadata_lte->dont_check_metadata_hash = 1; diff --git a/src/resource.c b/src/resource.c index ff46114d..1a46e751 100644 --- a/src/resource.c +++ b/src/resource.c @@ -90,29 +90,6 @@ */ -/* Decompress the specified chunk that uses the specified compression type - * @ctype, part of a WIM with default chunk size @wim_chunk_size. For LZX the - * separate @wim_chunk_size is needed because it determines the window size used - * for LZX compression. */ -static int -decompress(const void *cchunk, unsigned clen, void *uchunk, unsigned ulen, - int ctype, u32 wim_chunk_size) -{ - switch (ctype) { - case WIMLIB_COMPRESSION_TYPE_LZX: - return wimlib_lzx_decompress2(cchunk, clen, - uchunk, ulen, wim_chunk_size); - case WIMLIB_COMPRESSION_TYPE_XPRESS: - return wimlib_xpress_decompress(cchunk, clen, - uchunk, ulen); - case WIMLIB_COMPRESSION_TYPE_LZMS: - return wimlib_lzms_decompress(cchunk, clen, uchunk, ulen); - default: - ERROR("Invalid compression format (%d)", ctype); - return -1; - } -} - struct data_range { u64 offset; u64 size; @@ -164,6 +141,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, bool chunk_offsets_malloced = false; bool ubuf_malloced = false; bool cbuf_malloced = false; + struct wimlib_decompressor *decompressor; /* Sanity checks */ wimlib_assert(rspec != NULL); @@ -232,6 +210,21 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, goto out_free_memory; } + /* Get valid decompressor. */ + if (ctype == rspec->wim->decompressor_ctype && + chunk_size == rspec->wim->decompressor_max_block_size) + { + /* Cached decompressor. */ + decompressor = rspec->wim->decompressor; + rspec->wim->decompressor_ctype = WIMLIB_COMPRESSION_TYPE_NONE; + rspec->wim->decompressor = NULL; + } else { + ret = wimlib_create_decompressor(ctype, chunk_size, NULL, + &decompressor); + if (ret) + goto out_free_memory; + } + const u32 chunk_order = bsr32(chunk_size); /* Calculate the total number of chunks the resource is divided into. */ @@ -450,7 +443,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, ERROR("Invalid chunk size in compressed resource!"); errno = EINVAL; ret = WIMLIB_ERR_DECOMPRESSION; - goto out_free_memory; + goto out_save_decompressor; } if (rspec->is_pipable) cur_read_offset += sizeof(struct pwm_chunk_hdr); @@ -494,17 +487,16 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, DEBUG("Decompressing chunk %"PRIu64" " "(csize=%"PRIu32" usize=%"PRIu32")", i, chunk_csize, chunk_usize); - ret = decompress(cbuf, - chunk_csize, - ubuf, - chunk_usize, - ctype, - chunk_size); + ret = wimlib_decompress(cbuf, + chunk_csize, + ubuf, + chunk_usize, + decompressor); if (ret) { ERROR("Failed to decompress data!"); ret = WIMLIB_ERR_DECOMPRESSION; errno = EINVAL; - goto out_free_memory; + goto out_save_decompressor; } } cur_read_offset += chunk_csize; @@ -525,7 +517,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, ret = (*cb)(&ubuf[start], size, cb_ctx); if (ret) - goto out_free_memory; + goto out_save_decompressor; cur_range_pos += size; if (cur_range_pos == cur_range_end) { @@ -556,6 +548,11 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, goto read_error; } ret = 0; +out_save_decompressor: + wimlib_free_decompressor(rspec->wim->decompressor); + rspec->wim->decompressor = decompressor; + rspec->wim->decompressor_ctype = ctype; + rspec->wim->decompressor_max_block_size = chunk_size; out_free_memory: errno_save = errno; if (chunk_offsets_malloced) diff --git a/src/wim.c b/src/wim.c index 97e156dd..d833ec75 100644 --- a/src/wim.c +++ b/src/wim.c @@ -38,6 +38,8 @@ #include "wimlib/security.h" #include "wimlib/wim.h" #include "wimlib/xml.h" +#include "wimlib/compressor_ops.h" +#include "wimlib/decompressor_ops.h" #ifdef __WIN32__ # include "wimlib/win32.h" /* for realpath() replacement */ @@ -937,10 +939,10 @@ wimlib_free(WIMStruct *wim) if (filedes_valid(&wim->out_fd)) filedes_close(&wim->out_fd); - wimlib_lzx_free_context(wim->lzx_context); - free_lookup_table(wim->lookup_table); + wimlib_free_decompressor(wim->decompressor); + FREE(wim->filename); free_wim_info(wim->wim_info); if (wim->image_metadata) { @@ -1003,4 +1005,6 @@ wimlib_global_cleanup(void) #ifdef __WIN32__ win32_global_cleanup(); #endif + cleanup_decompressor_params(); + cleanup_compressor_params(); } diff --git a/src/write.c b/src/write.c index 7c39971f..e495cb5b 100644 --- a/src/write.c +++ b/src/write.c @@ -34,7 +34,7 @@ # include #endif -#include "wimlib/compress_chunks.h" +#include "wimlib/chunk_compressor.h" #include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/file_io.h" @@ -1199,11 +1199,6 @@ remove_zero_length_streams(struct list_head *stream_list) * no streams are hard-filtered or no streams are unhashed, this parameter * can be NULL. * - * @comp_ctx - * A location in which to allocate the pointer to the default LZX - * compression context. This will only be used if @out_ctype is - * WIMLIB_COMPRESSION_TYPE_LZX. - * * @progress_func * If non-NULL, a progress function that will be called periodically with * WIMLIB_PROGRESS_MSG_WRITE_STREAMS messages. Note that on-the-fly @@ -1268,7 +1263,6 @@ write_stream_list(struct list_head *stream_list, unsigned num_threads, struct wim_lookup_table *lookup_table, struct filter_context *filter_ctx, - struct wimlib_lzx_context **comp_ctx, wimlib_progress_func_t progress_func) { int ret; @@ -1367,15 +1361,8 @@ write_stream_list(struct list_head *stream_list, } if (ctx.compressor == NULL) { - if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZX) { - ret = wimlib_lzx_alloc_context(out_chunk_size, - NULL, - comp_ctx); - if (ret) - goto out_destroy_context; - } ret = new_serial_chunk_compressor(out_ctype, out_chunk_size, - *comp_ctx, &ctx.compressor); + &ctx.compressor); if (ret) goto out_destroy_context; } @@ -1478,8 +1465,7 @@ write_wim_resource(struct wim_lookup_table_entry *lte, struct filedes *out_fd, int out_ctype, u32 out_chunk_size, - int write_resource_flags, - struct wimlib_lzx_context **comp_ctx) + int write_resource_flags) { LIST_HEAD(stream_list); list_add(<e->write_streams_list, &stream_list); @@ -1492,7 +1478,6 @@ write_wim_resource(struct wim_lookup_table_entry *lte, 1, NULL, NULL, - comp_ctx, NULL); } @@ -1503,8 +1488,7 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size, u32 out_chunk_size, struct wim_reshdr *out_reshdr, u8 *hash, - int write_resource_flags, - struct wimlib_lzx_context **comp_ctx) + int write_resource_flags) { int ret; struct wim_lookup_table_entry *lte; @@ -1529,7 +1513,7 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size, } ret = write_wim_resource(lte, out_fd, out_ctype, out_chunk_size, - write_resource_flags, comp_ctx); + write_resource_flags); if (ret) goto out_free_lte; @@ -1936,7 +1920,6 @@ write_wim_streams(WIMStruct *wim, int image, int write_flags, num_threads, wim->lookup_table, filter_ctx, - &wim->lzx_context, progress_func); } @@ -1996,8 +1979,7 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags, &wim->out_fd, wim->out_compression_type, wim->out_chunk_size, - write_resource_flags, - &wim->lzx_context); + write_resource_flags); } if (ret) return ret; @@ -2117,8 +2099,7 @@ write_wim_lookup_table(WIMStruct *wim, int image, int write_flags, &wim->out_fd, wim->hdr.part_number, out_reshdr, - write_flags_to_resource_flags(write_flags), - &wim->lzx_context); + write_flags_to_resource_flags(write_flags)); } /* @@ -2991,7 +2972,6 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, num_threads, wim->lookup_table, &filter_ctx, - &wim->lzx_context, progress_func); if (ret) goto out_truncate; diff --git a/src/xml.c b/src/xml.c index 8179ebce..542fc379 100644 --- a/src/xml.c +++ b/src/xml.c @@ -1521,8 +1521,7 @@ write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes, 0, out_reshdr, NULL, - write_resource_flags, - &wim->lzx_context); + write_resource_flags); FREE(xml_data); DEBUG("ret=%d", ret); return ret; diff --git a/src/xpress-compress.c b/src/xpress-compress.c index 005a0a22..2637ce93 100644 --- a/src/xpress-compress.c +++ b/src/xpress-compress.c @@ -31,7 +31,8 @@ #include "wimlib.h" #include "wimlib/assert.h" -#include "wimlib/compress.h" +#include "wimlib/compressor_ops.h" +#include "wimlib/compress_common.h" #include "wimlib/error.h" #include "wimlib/util.h" #include "wimlib/xpress.h" @@ -42,6 +43,21 @@ #include #include +struct xpress_record_ctx { + input_idx_t freqs[XPRESS_NUM_SYMBOLS]; + struct xpress_match *matches; +}; + +struct xpress_compressor { + u8 *window; + u32 max_window_size; + struct xpress_match *matches; + input_idx_t *prev_tab; + u16 codewords[XPRESS_NUM_SYMBOLS]; + u8 lens[XPRESS_NUM_SYMBOLS]; + struct xpress_record_ctx record_ctx; +}; + /* Intermediate XPRESS match/literal representation. */ struct xpress_match { u16 adjusted_len; /* Match length minus XPRESS_MIN_MATCH_LEN */ @@ -98,11 +114,6 @@ xpress_write_matches_and_literals(struct output_bitstream *ostream, bitstream_put_bits(ostream, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]); } -struct xpress_record_ctx { - input_idx_t freqs[XPRESS_NUM_SYMBOLS]; - struct xpress_match *matches; -}; - static void xpress_record_literal(u8 lit, void *_ctx) { @@ -144,26 +155,16 @@ static const struct lz_params xpress_lz_params = { .too_far = 4096, }; -/* API function documented in wimlib.h */ -WIMLIBAPI unsigned -wimlib_xpress_compress(const void * restrict uncompressed_data, - unsigned uncompressed_len, - void * restrict compressed_data) +static size_t +xpress_compress(const void *uncompressed_data, size_t uncompressed_size, + void *compressed_data, size_t compressed_size_avail, void *_c) { + struct xpress_compressor *c = _c; u8 *cptr = compressed_data; struct output_bitstream ostream; - - struct xpress_record_ctx record_ctx; - - struct xpress_match *matches; - input_idx_t *prev_tab; - u8 *udata; - - u16 codewords[XPRESS_NUM_SYMBOLS]; - u8 lens[XPRESS_NUM_SYMBOLS]; input_idx_t num_matches; - input_idx_t compressed_len; input_idx_t i; + size_t compressed_size; /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's * impossible to compress 256 bytes or less of data to less than the @@ -174,94 +175,146 @@ wimlib_xpress_compress(const void * restrict uncompressed_data, * * +4 to take into account that init_output_bitstream() requires at * least 4 bytes of data. */ - if (uncompressed_len < XPRESS_NUM_SYMBOLS / 2 + 1 + 4) + if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 1 + 4) return 0; - if (uncompressed_len <= STACK_MAX) { - matches = alloca(uncompressed_len * sizeof(matches[0])); - udata = alloca(uncompressed_len + 8); - prev_tab = alloca(uncompressed_len * sizeof(prev_tab[0])); - } else { - matches = MALLOC(uncompressed_len * sizeof(matches[0])); - udata = MALLOC(uncompressed_len + 8); - prev_tab = MALLOC(uncompressed_len * sizeof(prev_tab[0])); - if (matches == NULL || udata == NULL || prev_tab == NULL) { - WARNING("Failed to allocate memory for compression..."); - compressed_len = 0; - goto out_free; - } - } - /* Copy the data to a temporary buffer, but only to avoid * inconsequential accesses of uninitialized memory in * lz_analyze_block(). */ - memcpy(udata, uncompressed_data, uncompressed_len); - memset(udata + uncompressed_len, 0, 8); + memcpy(c->window, uncompressed_data, uncompressed_size); + memset(c->window + uncompressed_size, 0, 8); /* Determine match/literal sequence to divide the data into. */ - ZERO_ARRAY(record_ctx.freqs); - record_ctx.matches = matches; - lz_analyze_block(udata, - uncompressed_len, + memset(c->record_ctx.freqs, 0, sizeof(c->record_ctx.freqs)); + c->record_ctx.matches = c->matches; + lz_analyze_block(c->window, + uncompressed_size, xpress_record_match, xpress_record_literal, - &record_ctx, + &c->record_ctx, &xpress_lz_params, - prev_tab); + c->prev_tab); - num_matches = (record_ctx.matches - matches); + num_matches = (c->record_ctx.matches - c->matches); /* Account for end of data symbol. */ - record_ctx.freqs[XPRESS_END_OF_DATA]++; + c->record_ctx.freqs[XPRESS_END_OF_DATA]++; /* Build the Huffman code. */ make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN, - record_ctx.freqs, lens, codewords); + c->record_ctx.freqs, c->lens, c->codewords); /* Output the Huffman code as a series of 512 4-bit lengths. */ for (i = 0; i < XPRESS_NUM_SYMBOLS; i += 2) - *cptr++ = (lens[i] & 0xf) | (lens[i + 1] << 4); + *cptr++ = (c->lens[i] & 0xf) | (c->lens[i + 1] << 4); /* Output the encoded matches/literals. */ init_output_bitstream(&ostream, cptr, - uncompressed_len - XPRESS_NUM_SYMBOLS / 2 - 1); - xpress_write_matches_and_literals(&ostream, matches, - num_matches, codewords, lens); + compressed_size_avail - XPRESS_NUM_SYMBOLS / 2 - 1); + + xpress_write_matches_and_literals(&ostream, c->matches, + num_matches, c->codewords, c->lens); /* Flush any pending data and get the length of the compressed data. */ - compressed_len = flush_output_bitstream(&ostream); - if (compressed_len == ~(input_idx_t)0) { - compressed_len = 0; - goto out_free; - } - compressed_len += XPRESS_NUM_SYMBOLS / 2; + compressed_size = flush_output_bitstream(&ostream); + if (compressed_size == ~(input_idx_t)0) + return 0; + + compressed_size += XPRESS_NUM_SYMBOLS / 2; #if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION) /* Verify that we really get the same thing back when decompressing. */ - if (wimlib_xpress_decompress(compressed_data, compressed_len, - udata, uncompressed_len)) { - ERROR("Failed to decompress data we " - "compressed using XPRESS algorithm"); - wimlib_assert(0); - compressed_len = 0; - goto out_free; - } - - if (memcmp(uncompressed_data, udata, uncompressed_len)) { - ERROR("Data we compressed using XPRESS algorithm " - "didn't decompress to original"); - wimlib_assert(0); - compressed_len = 0; - goto out_free; + struct wimlib_decompressor *decompressor; + + if (0 == wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_XPRESS, + c->max_window_size, + NULL, + &decompressor)) + { + int ret; + ret = wimlib_decompress(compressed_data, + compressed_size, + c->window, + uncompressed_size, + decompressor); + wimlib_free_decompressor(decompressor); + + if (ret) { + ERROR("Failed to decompress data we " + "compressed using XPRESS algorithm"); + wimlib_assert(0); + return 0; + } + if (memcmp(uncompressed_data, c->window, + uncompressed_size)) + { + ERROR("Data we compressed using XPRESS algorithm " + "didn't decompress to original"); + wimlib_assert(0); + return 0; + } + } else { + WARNING("Failed to create decompressor for " + "data verification!"); + } } #endif -out_free: - if (uncompressed_len > STACK_MAX) { - FREE(matches); - FREE(udata); - FREE(prev_tab); + return compressed_size; +} + +static void +xpress_free_compressor(void *_c) +{ + struct xpress_compressor *c = _c; + + if (c) { + FREE(c->window); + FREE(c->matches); + FREE(c->prev_tab); + FREE(c); } - return compressed_len; } + +static int +xpress_create_compressor(size_t max_window_size, + const struct wimlib_compressor_params_header *params, + void **c_ret) +{ + struct xpress_compressor *c; + + if (max_window_size == 0 || max_window_size > (1U << 26)) + return WIMLIB_ERR_INVALID_PARAM; + + c = CALLOC(1, sizeof(struct xpress_compressor)); + if (c == NULL) + goto oom; + + c->window = MALLOC(max_window_size + 8); + if (c->window == NULL) + goto oom; + + c->max_window_size = max_window_size; + + c->matches = MALLOC(max_window_size * sizeof(c->matches[0])); + if (c->matches == NULL) + goto oom; + + c->prev_tab = MALLOC(max_window_size * sizeof(c->prev_tab[0])); + if (c->prev_tab == NULL) + goto oom; + + *c_ret = c; + return 0; + +oom: + xpress_free_compressor(c); + return WIMLIB_ERR_NOMEM; +} + +const struct compressor_ops xpress_compressor_ops = { + .create_compressor = xpress_create_compressor, + .compress = xpress_compress, + .free_compressor = xpress_free_compressor, +}; diff --git a/src/xpress-decompress.c b/src/xpress-decompress.c index f08cccbb..e4f4c6ba 100644 --- a/src/xpress-decompress.c +++ b/src/xpress-decompress.c @@ -70,7 +70,8 @@ #endif #include "wimlib.h" -#include "wimlib/decompress.h" +#include "wimlib/decompressor_ops.h" +#include "wimlib/decompress_common.h" #include "wimlib/xpress.h" /* @@ -195,13 +196,11 @@ xpress_lz_decode(struct input_bitstream * restrict istream, /* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_xpress_decompress(const void * const restrict _compressed_data, - const unsigned compressed_len, - void * const restrict uncompressed_data, - const unsigned uncompressed_len) +static int +xpress_decompress(const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size, void *_ctx) { - const u8 *compressed_data = _compressed_data; + const u8 *cdata = compressed_data; u8 lens[XPRESS_NUM_SYMBOLS]; u8 *lens_p; u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS] @@ -211,13 +210,13 @@ wimlib_xpress_decompress(const void * const restrict _compressed_data, /* XPRESS uses only one Huffman code. It contains 512 symbols, and the * code lengths of these symbols are given literally as 4-bit integers * in the first 256 bytes of the compressed data. */ - if (compressed_len < XPRESS_NUM_SYMBOLS / 2) + if (compressed_size < XPRESS_NUM_SYMBOLS / 2) return -1; lens_p = lens; for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { - *lens_p++ = compressed_data[i] & 0xf; - *lens_p++ = compressed_data[i] >> 4; + *lens_p++ = cdata[i] & 0xf; + *lens_p++ = cdata[i] >> 4; } if (make_huffman_decode_table(decode_table, XPRESS_NUM_SYMBOLS, @@ -225,9 +224,13 @@ wimlib_xpress_decompress(const void * const restrict _compressed_data, XPRESS_MAX_CODEWORD_LEN)) return -1; - init_input_bitstream(&istream, compressed_data + XPRESS_NUM_SYMBOLS / 2, - compressed_len - XPRESS_NUM_SYMBOLS / 2); + init_input_bitstream(&istream, cdata + XPRESS_NUM_SYMBOLS / 2, + compressed_size - XPRESS_NUM_SYMBOLS / 2); return xpress_lz_decode(&istream, uncompressed_data, - uncompressed_len, lens, decode_table); + uncompressed_size, lens, decode_table); } + +const struct decompressor_ops xpress_decompressor_ops = { + .decompress = xpress_decompress, +};