From 3d8ef754a66f76c8f7121b65a4e466bce6a75f0f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Aug 2014 22:48:53 -0500 Subject: [PATCH] LZX: Allow max_block_size not a power of 2 This should be treated as an internal of LZX. --- include/wimlib/lzx.h | 5 +++-- src/lzx-common.c | 31 ++++++++++++++++++++----------- src/lzx-compress.c | 39 +++++++++++++++++++++++++++++---------- src/lzx-decompress.c | 28 +++++++++++++--------------- 4 files changed, 65 insertions(+), 38 deletions(-) diff --git a/include/wimlib/lzx.h b/include/wimlib/lzx.h index 67fb3561..97d4a691 100644 --- a/include/wimlib/lzx.h +++ b/include/wimlib/lzx.h @@ -65,8 +65,9 @@ lzx_get_position_slot_raw(u32 formatted_offset) } } -extern bool lzx_window_size_valid(size_t window_size); -extern unsigned lzx_get_num_main_syms(u32 window_size); +extern unsigned lzx_get_window_order(size_t max_block_size); + +extern unsigned lzx_get_num_main_syms(unsigned window_order); /* Least-recently used queue for match offsets. */ struct lzx_lru_queue { diff --git a/src/lzx-common.c b/src/lzx-common.c index b2eeb7d1..566161c3 100644 --- a/src/lzx-common.c +++ b/src/lzx-common.c @@ -67,23 +67,32 @@ const u8 lzx_extra_bits[LZX_MAX_POSITION_SLOTS] = { }; #endif -/* LZX window size must be a power of 2 between 2^15 and 2^21, inclusively. */ -bool -lzx_window_size_valid(size_t window_size) +/* Round the specified compression block size (not LZX block size) up to the + * next valid LZX window size, and return its order (log2). Or, if the block + * size is 0 or greater than the largest valid LZX window size, return 0. */ +unsigned +lzx_get_window_order(size_t max_block_size) { - if (window_size == 0 || (u32)window_size != window_size) - return false; - u32 order = bsr32(window_size); - if (window_size != 1U << order) - return false; - return (order >= LZX_MIN_WINDOW_ORDER && order <= LZX_MAX_WINDOW_ORDER); + unsigned order; + + if (max_block_size == 0 || max_block_size > (1 << LZX_MAX_WINDOW_ORDER)) + return 0; + + order = bsr32(max_block_size); + + if ((1 << order) != max_block_size) + order++; + + return max(order, LZX_MIN_WINDOW_ORDER); } -/* Given a valid LZX window size, return the number of symbols that will exist +/* Given a valid LZX window order, return the number of symbols that will exist * in the main Huffman code. */ unsigned -lzx_get_num_main_syms(u32 window_size) +lzx_get_num_main_syms(unsigned window_order) { + u32 window_size = 1 << window_order; + /* NOTE: the calculation *should* be as follows: * * u32 max_offset = window_size - LZX_MIN_MATCH_LEN; diff --git a/src/lzx-compress.c b/src/lzx-compress.c index 0a4edb0e..96ea2e5d 100644 --- a/src/lzx-compress.c +++ b/src/lzx-compress.c @@ -328,14 +328,24 @@ struct lzx_compressor { /* Allocated size of @cur_window. */ u32 max_window_size; + /* log2 order of the LZX window size for LZ match offset encoding + * purposes. Will be >= LZX_MIN_WINDOW_ORDER and <= + * LZX_MAX_WINDOW_ORDER. + * + * Note: 1 << @window_order is normally equal to @max_window_size, but + * it will be greater than @max_window_size in the event that the + * compressor was created with a non-power-of-2 block size. (See + * lzx_get_window_order().) */ + unsigned window_order; + /* Compression parameters. */ struct lzx_compressor_params params; unsigned (*get_matches_func)(struct lzx_compressor *, const struct lz_match **); void (*skip_bytes_func)(struct lzx_compressor *, unsigned n); - /* Number of symbols in the main alphabet (depends on the - * @max_window_size since it determines the maximum allowed offset). */ + /* Number of symbols in the main alphabet (depends on the @window_order + * since it determines the maximum allowed offset). */ unsigned num_main_syms; /* The current match offset LRU queue. */ @@ -998,7 +1008,7 @@ lzx_write_items(struct lzx_output_bitstream *os, int block_type, static void lzx_write_compressed_block(int block_type, u32 block_size, - u32 max_window_size, + unsigned window_order, unsigned num_main_syms, struct lzx_item * chosen_items, u32 num_chosen_items, @@ -1033,7 +1043,7 @@ lzx_write_compressed_block(int block_type, } else { lzx_write_bits(os, 0, 1); - if (max_window_size >= 65536) + if (window_order >= 16) lzx_write_bits(os, block_size >> 16, 8); lzx_write_bits(os, block_size & 0xFFFF, 16); @@ -1075,7 +1085,7 @@ lzx_write_all_blocks(struct lzx_compressor *c, struct lzx_output_bitstream *os) lzx_write_compressed_block(spec->block_type, spec->block_size, - c->max_window_size, + c->window_order, c->num_main_syms, spec->chosen_items, spec->num_chosen_items, @@ -2254,13 +2264,17 @@ static void lzx_free_compressor(void *_c); static u64 -lzx_get_needed_memory(size_t max_window_size, unsigned int compression_level) +lzx_get_needed_memory(size_t max_block_size, unsigned int compression_level) { struct lzx_compressor_params params; u64 size = 0; + unsigned window_order; + u32 max_window_size; - if (!lzx_window_size_valid(max_window_size)) + window_order = lzx_get_window_order(max_block_size); + if (window_order == 0) return 0; + max_window_size = max_block_size; lzx_build_params(compression_level, max_window_size, ¶ms); @@ -2286,15 +2300,19 @@ lzx_get_needed_memory(size_t max_window_size, unsigned int compression_level) } static int -lzx_create_compressor(size_t max_window_size, unsigned int compression_level, +lzx_create_compressor(size_t max_block_size, unsigned int compression_level, void **c_ret) { struct lzx_compressor *c; struct lzx_compressor_params params; struct lz_mf_params mf_params; + unsigned window_order; + u32 max_window_size; - if (!lzx_window_size_valid(max_window_size)) + window_order = lzx_get_window_order(max_block_size); + if (window_order == 0) return WIMLIB_ERR_INVALID_PARAM; + max_window_size = max_block_size; lzx_build_params(compression_level, max_window_size, ¶ms); lzx_build_mf_params(¶ms, max_window_size, &mf_params); @@ -2306,8 +2324,9 @@ lzx_create_compressor(size_t max_window_size, unsigned int compression_level, goto oom; c->params = params; - c->num_main_syms = lzx_get_num_main_syms(max_window_size); + c->num_main_syms = lzx_get_num_main_syms(window_order); c->max_window_size = max_window_size; + c->window_order = window_order; c->cur_window = ALIGNED_MALLOC(max_window_size, 16); if (!c->cur_window) diff --git a/src/lzx-decompress.c b/src/lzx-decompress.c index 0c95ac03..1e853274 100644 --- a/src/lzx-decompress.c +++ b/src/lzx-decompress.c @@ -94,12 +94,11 @@ struct lzx_tables { /* The main LZX decompressor structure. * * Note: we keep track of most of the decompression state outside this - * structure. This structure only exists so that (1) we can store - * @max_window_size and @num_main_syms for multiple calls to lzx_decompress(); - * and (2) so that we don't have to allocate the large 'struct lzx_tables' on - * the stack. */ + * structure. This structure only exists so that (1) we can store @window_order + * and @num_main_syms for multiple calls to lzx_decompress(); and (2) so that we + * don't have to allocate the large 'struct lzx_tables' on the stack. */ struct lzx_decompressor { - u32 max_window_size; + unsigned window_order; unsigned num_main_syms; struct lzx_tables tables; }; @@ -265,7 +264,7 @@ lzx_read_codeword_lens(struct input_bitstream *istream, u8 lens[], unsigned num_ static int lzx_read_block_header(struct input_bitstream *istream, unsigned num_main_syms, - u32 max_window_size, + unsigned window_order, int *block_type_ret, u32 *block_size_ret, struct lzx_tables *tables, @@ -296,7 +295,7 @@ lzx_read_block_header(struct input_bitstream *istream, block_size <<= 8; block_size |= tmp; - if (max_window_size >= 65536) { + if (window_order >= 16) { tmp = bitstream_read_bits(istream, 8); block_size <<= 8; block_size |= tmp; @@ -593,9 +592,6 @@ lzx_decompress(const void *compressed_data, size_t compressed_size, bool may_have_e8_byte; int ret; - if (uncompressed_size > dec->max_window_size) - return -1; - init_input_bitstream(&istream, compressed_data, compressed_size); /* Initialize the recent offsets queue. */ @@ -619,7 +615,7 @@ lzx_decompress(const void *compressed_data, size_t compressed_size, window_pos += block_size) { ret = lzx_read_block_header(&istream, dec->num_main_syms, - dec->max_window_size, &block_type, + dec->window_order, &block_type, &block_size, &dec->tables, &queue); if (ret) return ret; @@ -683,11 +679,13 @@ lzx_free_decompressor(void *_dec) } static int -lzx_create_decompressor(size_t max_window_size, void **dec_ret) +lzx_create_decompressor(size_t max_block_size, void **dec_ret) { struct lzx_decompressor *dec; + unsigned window_order; - if (!lzx_window_size_valid(max_window_size)) + window_order = lzx_get_window_order(max_block_size); + if (window_order == 0) return WIMLIB_ERR_INVALID_PARAM; /* The aligned allocation is needed to ensure that the lzx_tables are @@ -697,8 +695,8 @@ lzx_create_decompressor(size_t max_window_size, void **dec_ret) if (!dec) return WIMLIB_ERR_NOMEM; - dec->max_window_size = max_window_size; - dec->num_main_syms = lzx_get_num_main_syms(max_window_size); + dec->window_order = window_order; + dec->num_main_syms = lzx_get_num_main_syms(window_order); *dec_ret = dec; return 0; -- 2.43.0