LZX: Allow max_block_size not a power of 2
authorEric Biggers <ebiggers3@gmail.com>
Tue, 19 Aug 2014 03:48:53 +0000 (22:48 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Tue, 19 Aug 2014 04:19:07 +0000 (23:19 -0500)
This should be treated as an internal of LZX.

include/wimlib/lzx.h
src/lzx-common.c
src/lzx-compress.c
src/lzx-decompress.c

index 67fb356..97d4a69 100644 (file)
@@ -65,8 +65,9 @@ lzx_get_position_slot_raw(u32 formatted_offset)
        }
 }
 
-extern bool lzx_window_size_valid(size_t window_size);
-extern unsigned lzx_get_num_main_syms(u32 window_size);
+extern unsigned lzx_get_window_order(size_t max_block_size);
+
+extern unsigned lzx_get_num_main_syms(unsigned window_order);
 
 /* Least-recently used queue for match offsets.  */
 struct lzx_lru_queue {
index b2eeb7d..566161c 100644 (file)
@@ -67,23 +67,32 @@ const u8 lzx_extra_bits[LZX_MAX_POSITION_SLOTS] = {
 };
 #endif
 
-/* LZX window size must be a power of 2 between 2^15 and 2^21, inclusively.  */
-bool
-lzx_window_size_valid(size_t window_size)
+/* Round the specified compression block size (not LZX block size) up to the
+ * next valid LZX window size, and return its order (log2).  Or, if the block
+ * size is 0 or greater than the largest valid LZX window size, return 0.  */
+unsigned
+lzx_get_window_order(size_t max_block_size)
 {
-       if (window_size == 0 || (u32)window_size != window_size)
-               return false;
-       u32 order = bsr32(window_size);
-       if (window_size != 1U << order)
-               return false;
-       return (order >= LZX_MIN_WINDOW_ORDER && order <= LZX_MAX_WINDOW_ORDER);
+       unsigned order;
+
+       if (max_block_size == 0 || max_block_size > (1 << LZX_MAX_WINDOW_ORDER))
+               return 0;
+
+       order = bsr32(max_block_size);
+
+       if ((1 << order) != max_block_size)
+               order++;
+
+       return max(order, LZX_MIN_WINDOW_ORDER);
 }
 
-/* Given a valid LZX window size, return the number of symbols that will exist
+/* Given a valid LZX window order, return the number of symbols that will exist
  * in the main Huffman code.  */
 unsigned
-lzx_get_num_main_syms(u32 window_size)
+lzx_get_num_main_syms(unsigned window_order)
 {
+       u32 window_size = 1 << window_order;
+
        /* NOTE: the calculation *should* be as follows:
         *
         * u32 max_offset = window_size - LZX_MIN_MATCH_LEN;
index 0a4edb0..96ea2e5 100644 (file)
@@ -328,14 +328,24 @@ struct lzx_compressor {
        /* Allocated size of @cur_window.  */
        u32 max_window_size;
 
+       /* log2 order of the LZX window size for LZ match offset encoding
+        * purposes.  Will be >= LZX_MIN_WINDOW_ORDER and <=
+        * LZX_MAX_WINDOW_ORDER.
+        *
+        * Note: 1 << @window_order is normally equal to @max_window_size, but
+        * it will be greater than @max_window_size in the event that the
+        * compressor was created with a non-power-of-2 block size.  (See
+        * lzx_get_window_order().)  */
+       unsigned window_order;
+
        /* Compression parameters.  */
        struct lzx_compressor_params params;
 
        unsigned (*get_matches_func)(struct lzx_compressor *, const struct lz_match **);
        void (*skip_bytes_func)(struct lzx_compressor *, unsigned n);
 
-       /* Number of symbols in the main alphabet (depends on the
-        * @max_window_size since it determines the maximum allowed offset).  */
+       /* Number of symbols in the main alphabet (depends on the @window_order
+        * since it determines the maximum allowed offset).  */
        unsigned num_main_syms;
 
        /* The current match offset LRU queue.  */
@@ -998,7 +1008,7 @@ lzx_write_items(struct lzx_output_bitstream *os, int block_type,
 static void
 lzx_write_compressed_block(int block_type,
                           u32 block_size,
-                          u32 max_window_size,
+                          unsigned window_order,
                           unsigned num_main_syms,
                           struct lzx_item * chosen_items,
                           u32 num_chosen_items,
@@ -1033,7 +1043,7 @@ lzx_write_compressed_block(int block_type,
        } else {
                lzx_write_bits(os, 0, 1);
 
-               if (max_window_size >= 65536)
+               if (window_order >= 16)
                        lzx_write_bits(os, block_size >> 16, 8);
 
                lzx_write_bits(os, block_size & 0xFFFF, 16);
@@ -1075,7 +1085,7 @@ lzx_write_all_blocks(struct lzx_compressor *c, struct lzx_output_bitstream *os)
 
                lzx_write_compressed_block(spec->block_type,
                                           spec->block_size,
-                                          c->max_window_size,
+                                          c->window_order,
                                           c->num_main_syms,
                                           spec->chosen_items,
                                           spec->num_chosen_items,
@@ -2254,13 +2264,17 @@ static void
 lzx_free_compressor(void *_c);
 
 static u64
-lzx_get_needed_memory(size_t max_window_size, unsigned int compression_level)
+lzx_get_needed_memory(size_t max_block_size, unsigned int compression_level)
 {
        struct lzx_compressor_params params;
        u64 size = 0;
+       unsigned window_order;
+       u32 max_window_size;
 
-       if (!lzx_window_size_valid(max_window_size))
+       window_order = lzx_get_window_order(max_block_size);
+       if (window_order == 0)
                return 0;
+       max_window_size = max_block_size;
 
        lzx_build_params(compression_level, max_window_size, &params);
 
@@ -2286,15 +2300,19 @@ lzx_get_needed_memory(size_t max_window_size, unsigned int compression_level)
 }
 
 static int
-lzx_create_compressor(size_t max_window_size, unsigned int compression_level,
+lzx_create_compressor(size_t max_block_size, unsigned int compression_level,
                      void **c_ret)
 {
        struct lzx_compressor *c;
        struct lzx_compressor_params params;
        struct lz_mf_params mf_params;
+       unsigned window_order;
+       u32 max_window_size;
 
-       if (!lzx_window_size_valid(max_window_size))
+       window_order = lzx_get_window_order(max_block_size);
+       if (window_order == 0)
                return WIMLIB_ERR_INVALID_PARAM;
+       max_window_size = max_block_size;
 
        lzx_build_params(compression_level, max_window_size, &params);
        lzx_build_mf_params(&params, max_window_size, &mf_params);
@@ -2306,8 +2324,9 @@ lzx_create_compressor(size_t max_window_size, unsigned int compression_level,
                goto oom;
 
        c->params = params;
-       c->num_main_syms = lzx_get_num_main_syms(max_window_size);
+       c->num_main_syms = lzx_get_num_main_syms(window_order);
        c->max_window_size = max_window_size;
+       c->window_order = window_order;
 
        c->cur_window = ALIGNED_MALLOC(max_window_size, 16);
        if (!c->cur_window)
index 0c95ac0..1e85327 100644 (file)
@@ -94,12 +94,11 @@ struct lzx_tables {
 /* The main LZX decompressor structure.
  *
  * Note: we keep track of most of the decompression state outside this
- * structure.  This structure only exists so that (1) we can store
- * @max_window_size and @num_main_syms for multiple calls to lzx_decompress();
- * and (2) so that we don't have to allocate the large 'struct lzx_tables' on
- * the stack.  */
+ * structure.  This structure only exists so that (1) we can store @window_order
+ * and @num_main_syms for multiple calls to lzx_decompress(); and (2) so that we
+ * don't have to allocate the large 'struct lzx_tables' on the stack.  */
 struct lzx_decompressor {
-       u32 max_window_size;
+       unsigned window_order;
        unsigned num_main_syms;
        struct lzx_tables tables;
 };
@@ -265,7 +264,7 @@ lzx_read_codeword_lens(struct input_bitstream *istream, u8 lens[], unsigned num_
 static int
 lzx_read_block_header(struct input_bitstream *istream,
                      unsigned num_main_syms,
-                     u32 max_window_size,
+                     unsigned window_order,
                      int *block_type_ret,
                      u32 *block_size_ret,
                      struct lzx_tables *tables,
@@ -296,7 +295,7 @@ lzx_read_block_header(struct input_bitstream *istream,
                block_size <<= 8;
                block_size |= tmp;
 
-               if (max_window_size >= 65536) {
+               if (window_order >= 16) {
                        tmp = bitstream_read_bits(istream, 8);
                        block_size <<= 8;
                        block_size |= tmp;
@@ -593,9 +592,6 @@ lzx_decompress(const void *compressed_data, size_t compressed_size,
        bool may_have_e8_byte;
        int ret;
 
-       if (uncompressed_size > dec->max_window_size)
-               return -1;
-
        init_input_bitstream(&istream, compressed_data, compressed_size);
 
        /* Initialize the recent offsets queue.  */
@@ -619,7 +615,7 @@ lzx_decompress(const void *compressed_data, size_t compressed_size,
             window_pos += block_size)
        {
                ret = lzx_read_block_header(&istream, dec->num_main_syms,
-                                           dec->max_window_size, &block_type,
+                                           dec->window_order, &block_type,
                                            &block_size, &dec->tables, &queue);
                if (ret)
                        return ret;
@@ -683,11 +679,13 @@ lzx_free_decompressor(void *_dec)
 }
 
 static int
-lzx_create_decompressor(size_t max_window_size, void **dec_ret)
+lzx_create_decompressor(size_t max_block_size, void **dec_ret)
 {
        struct lzx_decompressor *dec;
+       unsigned window_order;
 
-       if (!lzx_window_size_valid(max_window_size))
+       window_order = lzx_get_window_order(max_block_size);
+       if (window_order == 0)
                return WIMLIB_ERR_INVALID_PARAM;
 
        /* The aligned allocation is needed to ensure that the lzx_tables are
@@ -697,8 +695,8 @@ lzx_create_decompressor(size_t max_window_size, void **dec_ret)
        if (!dec)
                return WIMLIB_ERR_NOMEM;
 
-       dec->max_window_size = max_window_size;
-       dec->num_main_syms = lzx_get_num_main_syms(max_window_size);
+       dec->window_order = window_order;
+       dec->num_main_syms = lzx_get_num_main_syms(window_order);
 
        *dec_ret = dec;
        return 0;