Support "destructive" compression to save memory
authorEric Biggers <ebiggers3@gmail.com>
Sun, 15 Feb 2015 00:30:24 +0000 (18:30 -0600)
committerEric Biggers <ebiggers3@gmail.com>
Sun, 15 Feb 2015 01:17:16 +0000 (19:17 -0600)
include/wimlib.h
include/wimlib/compressor_ops.h
src/compress.c
src/compress_parallel.c
src/compress_serial.c
src/lzms_compress.c
src/lzx_compress.c
src/xpress_compress.c

index a7b18489ef178c67f426e0d6e84251598627d063..c2ecb48dffdf8e11e5ab7ef94c96cb2579d40d7f 100644 (file)
@@ -4624,6 +4624,8 @@ wimlib_get_compressor_needed_memory(enum wimlib_compression_type ctype,
                                    size_t max_block_size,
                                    unsigned int compression_level);
 
+#define WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE     0x80000000
+
 /**
  * Allocate a compressor for the specified compression type using the specified
  * parameters.  This function is part of wimlib's compression API; it is not
@@ -4668,6 +4670,15 @@ wimlib_get_compressor_needed_memory(enum wimlib_compression_type ctype,
  *     The compression level does not affect the format of the compressed data.
  *     Therefore, it is a compressor-only parameter and does not need to be
  *     passed to the decompressor.
+ *     <br/>
+ *     Since wimlib v1.7.5, this parameter can be OR-ed with the flag
+ *     ::WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE.  This creates the compressor in a
+ *     mode where it is allowed to modify the input buffer.  Specifically, in
+ *     this mode, if compression succeeds, the input buffer may have been
+ *     modified, whereas if compression does not succeed the input buffer still
+ *     may have been written to but will have been restored exactly to its
+ *     original state.  This mode is designed to save some memory when using
+ *     large buffer sizes.
  * @param compressor_ret
  *     A location into which to return the pointer to the allocated compressor.
  *     The allocated compressor can be used for any number of calls to
index af81546c8230e3c32a9a21055edb6e56ac89f02c..bef5cf6d1b440d9f7d6e5b91adaf8052e530c4ee 100644 (file)
 struct compressor_ops {
 
        u64 (*get_needed_memory)(size_t max_block_size,
-                                unsigned int compression_level);
+                                unsigned int compression_level,
+                                bool destructive);
 
        int (*create_compressor)(size_t max_block_size,
                                 unsigned int compression_level,
+                                bool destructive,
                                 void **private_ret);
 
        size_t (*compress)(const void *uncompressed_data,
index 47026e368d0b256afd1829fdbdc01a49e99be5b9..ff28ebc0f79d915701a9fdb0ef68ac47e3fbda8c 100644 (file)
@@ -81,12 +81,19 @@ wimlib_get_compressor_needed_memory(enum wimlib_compression_type ctype,
                                    size_t max_block_size,
                                    unsigned int compression_level)
 {
+       bool destructive;
        const struct compressor_ops *ops;
        u64 size;
 
+       destructive = (compression_level & WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE);
+       compression_level &= ~WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE;
+
        if (!compressor_ctype_valid(ctype))
                return 0;
 
+       if (compression_level > 0xFFFFFF)
+               return 0;
+
        if (max_block_size == 0)
                return 0;
 
@@ -98,7 +105,8 @@ wimlib_get_compressor_needed_memory(enum wimlib_compression_type ctype,
                compression_level = DEFAULT_COMPRESSION_LEVEL;
 
        if (ops->get_needed_memory) {
-               size = ops->get_needed_memory(max_block_size, compression_level);
+               size = ops->get_needed_memory(max_block_size, compression_level,
+                                             destructive);
 
                /* 0 is never valid and indicates an invalid max_block_size.  */
                if (size == 0)
@@ -115,11 +123,18 @@ wimlib_create_compressor(enum wimlib_compression_type ctype,
                         unsigned int compression_level,
                         struct wimlib_compressor **c_ret)
 {
+       bool destructive;
        struct wimlib_compressor *c;
 
+       destructive = (compression_level & WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE);
+       compression_level &= ~WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE;
+
        if (!compressor_ctype_valid(ctype))
                return WIMLIB_ERR_INVALID_COMPRESSION_TYPE;
 
+       if (compression_level > 0xFFFFFF)
+               return WIMLIB_ERR_INVALID_PARAM;
+
        if (c_ret == NULL)
                return WIMLIB_ERR_INVALID_PARAM;
 
@@ -143,6 +158,7 @@ wimlib_create_compressor(enum wimlib_compression_type ctype,
 
                ret = c->ops->create_compressor(max_block_size,
                                                compression_level,
+                                               destructive,
                                                &c->private);
                if (ret) {
                        FREE(c);
index b3377ffc985ca636ce6d1746b7c939ec7b349524..0569b176fcbf2a9842585c7b6e0962bc3d8cf490 100644 (file)
@@ -523,7 +523,8 @@ new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
 
                dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
                dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
-               ret = wimlib_create_compressor(out_ctype, out_chunk_size, 0,
+               ret = wimlib_create_compressor(out_ctype, out_chunk_size,
+                                              WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
                                               &dat->compressor);
                if (ret)
                        goto err;
index 35470ec531a6ae40bf72d43b91f1a9315fbe9658..6bac63ec8f9b2037bf720c82d69649264eddc006 100644 (file)
@@ -127,7 +127,8 @@ new_serial_chunk_compressor(int out_ctype, u32 out_chunk_size,
        ctx->base.get_compression_result = serial_chunk_compressor_get_compression_result;
 
        ret = wimlib_create_compressor(out_ctype, out_chunk_size,
-                                      0, &ctx->compressor);
+                                      WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
+                                      &ctx->compressor);
        if (ret)
                goto err;
 
index abf39f5e89f194d5ff0fb087b28a3c4c73b57100..caa93130c75ca44f656a1d8464621985d6eb3bd1 100644 (file)
@@ -287,6 +287,10 @@ struct lzms_compressor {
         */
        bool use_delta_matches;
 
+       /* If true, the compressor need not preserve the input buffer if it
+        * compresses the data successfully.  */
+       bool destructive;
+
        /* 'last_target_usages' is a large array that is only needed for
         * preprocessing, so it is in union with fields that don't need to be
         * initialized until after preprocessing.  */
@@ -2100,7 +2104,8 @@ lzms_finalize(struct lzms_compressor *c)
 }
 
 static u64
-lzms_get_needed_memory(size_t max_bufsize, unsigned compression_level)
+lzms_get_needed_memory(size_t max_bufsize, unsigned compression_level,
+                      bool destructive)
 {
        u64 size = 0;
 
@@ -2109,8 +2114,8 @@ lzms_get_needed_memory(size_t max_bufsize, unsigned compression_level)
 
        size += sizeof(struct lzms_compressor);
 
-       /* in_buffer */
-       size += max_bufsize;
+       if (!destructive)
+               size += max_bufsize; /* in_buffer */
 
        /* mf */
        size += lcpit_matchfinder_get_needed_memory(max_bufsize);
@@ -2120,7 +2125,7 @@ lzms_get_needed_memory(size_t max_bufsize, unsigned compression_level)
 
 static int
 lzms_create_compressor(size_t max_bufsize, unsigned compression_level,
-                      void **c_ret)
+                      bool destructive, void **c_ret)
 {
        struct lzms_compressor *c;
        u32 nice_match_len;
@@ -2132,6 +2137,8 @@ lzms_create_compressor(size_t max_bufsize, unsigned compression_level,
        if (!c)
                goto oom0;
 
+       c->destructive = destructive;
+
        /* Scale nice_match_len with the compression level.  But to allow an
         * optimization for length cost calculations, don't allow nice_match_len
         * to exceed MAX_FAST_LENGTH.  */
@@ -2142,9 +2149,11 @@ lzms_create_compressor(size_t max_bufsize, unsigned compression_level,
        c->try_lit_lzrep0 = (compression_level >= 60);
        c->try_lzrep_lit_lzrep0 = (compression_level >= 60);
 
-       c->in_buffer = MALLOC(max_bufsize);
-       if (!c->in_buffer)
-               goto oom1;
+       if (!c->destructive) {
+               c->in_buffer = MALLOC(max_bufsize);
+               if (!c->in_buffer)
+                       goto oom1;
+       }
 
        if (!lcpit_matchfinder_init(&c->mf, max_bufsize, 2, nice_match_len))
                goto oom2;
@@ -2156,7 +2165,8 @@ lzms_create_compressor(size_t max_bufsize, unsigned compression_level,
        return 0;
 
 oom2:
-       FREE(c->in_buffer);
+       if (!c->destructive)
+               FREE(c->in_buffer);
 oom1:
        ALIGNED_FREE(c);
 oom0:
@@ -2168,13 +2178,17 @@ lzms_compress(const void *in, size_t in_nbytes,
              void *out, size_t out_nbytes_avail, void *_c)
 {
        struct lzms_compressor *c = _c;
+       size_t result;
 
        /* Don't bother trying to compress extremely small inputs.  */
        if (in_nbytes < 4)
                return 0;
 
        /* Copy the input data into the internal buffer and preprocess it.  */
-       memcpy(c->in_buffer, in, in_nbytes);
+       if (c->destructive)
+               c->in_buffer = (void *)in;
+       else
+               memcpy(c->in_buffer, in, in_nbytes);
        c->in_nbytes = in_nbytes;
        lzms_x86_filter(c->in_buffer, in_nbytes, c->last_target_usages, false);
 
@@ -2187,13 +2201,16 @@ lzms_compress(const void *in, size_t in_nbytes,
        lzms_range_encoder_init(&c->rc, out, out_nbytes_avail / sizeof(le16));
        lzms_output_bitstream_init(&c->os, out, out_nbytes_avail / sizeof(le16));
        lzms_init_states_and_probabilities(c);
-       lzms_init_huffman_codes(c, lzms_get_num_offset_slots(in_nbytes));
+       lzms_init_huffman_codes(c, lzms_get_num_offset_slots(c->in_nbytes));
 
        /* The main loop: parse and encode.  */
        lzms_near_optimal_parse(c);
 
        /* Return the compressed data size or 0.  */
-       return lzms_finalize(c);
+       result = lzms_finalize(c);
+       if (!result && c->destructive)
+               lzms_x86_filter(c->in_buffer, c->in_nbytes, c->last_target_usages, true);
+       return result;
 }
 
 static void
@@ -2201,7 +2218,8 @@ lzms_free_compressor(void *_c)
 {
        struct lzms_compressor *c = _c;
 
-       FREE(c->in_buffer);
+       if (!c->destructive)
+               FREE(c->in_buffer);
        lcpit_matchfinder_destroy(&c->mf);
        ALIGNED_FREE(c);
 }
index 075a65a1e5064541646f1c263c70de10d0d70e39..1506b8842ca567533e1fafea263edaaac89fe8ff 100644 (file)
@@ -368,6 +368,10 @@ struct lzx_compressor {
        /* Pointer to the compress() implementation chosen at allocation time */
        void (*impl)(struct lzx_compressor *, struct lzx_output_bitstream *);
 
+       /* If true, the compressor need not preserve the input buffer if it
+        * compresses the data successfully.  */
+       bool destructive;
+
        /* The Huffman symbol frequency counters for the current block.  */
        struct lzx_freqs freqs;
 
@@ -2010,7 +2014,8 @@ lzx_get_compressor_size(size_t max_bufsize, unsigned compression_level)
 }
 
 static u64
-lzx_get_needed_memory(size_t max_bufsize, unsigned compression_level)
+lzx_get_needed_memory(size_t max_bufsize, unsigned compression_level,
+                     bool destructive)
 {
        u64 size = 0;
 
@@ -2018,13 +2023,14 @@ lzx_get_needed_memory(size_t max_bufsize, unsigned compression_level)
                return 0;
 
        size += lzx_get_compressor_size(max_bufsize, compression_level);
-       size += max_bufsize; /* in_buffer */
+       if (!destructive)
+               size += max_bufsize; /* in_buffer */
        return size;
 }
 
 static int
 lzx_create_compressor(size_t max_bufsize, unsigned compression_level,
-                     void **c_ret)
+                     bool destructive, void **c_ret)
 {
        unsigned window_order;
        struct lzx_compressor *c;
@@ -2039,12 +2045,16 @@ lzx_create_compressor(size_t max_bufsize, unsigned compression_level,
        if (!c)
                goto oom0;
 
+       c->destructive = destructive;
+
        c->num_main_syms = lzx_get_num_main_syms(window_order);
        c->window_order = window_order;
 
-       c->in_buffer = MALLOC(max_bufsize);
-       if (!c->in_buffer)
-               goto oom1;
+       if (!c->destructive) {
+               c->in_buffer = MALLOC(max_bufsize);
+               if (!c->in_buffer)
+                       goto oom1;
+       }
 
        if (compression_level <= LZX_MAX_FAST_LEVEL) {
 
@@ -2117,13 +2127,17 @@ lzx_compress(const void *in, size_t in_nbytes,
 {
        struct lzx_compressor *c = _c;
        struct lzx_output_bitstream os;
+       size_t result;
 
        /* Don't bother trying to compress very small inputs.  */
        if (in_nbytes < 100)
                return 0;
 
        /* Copy the input data into the internal buffer and preprocess it.  */
-       memcpy(c->in_buffer, in, in_nbytes);
+       if (c->destructive)
+               c->in_buffer = (void *)in;
+       else
+               memcpy(c->in_buffer, in, in_nbytes);
        c->in_nbytes = in_nbytes;
        lzx_do_e8_preprocessing(c->in_buffer, in_nbytes);
 
@@ -2138,7 +2152,10 @@ lzx_compress(const void *in, size_t in_nbytes,
        (*c->impl)(c, &os);
 
        /* Flush the output bitstream and return the compressed size or 0.  */
-       return lzx_flush_output(&os);
+       result = lzx_flush_output(&os);
+       if (!result && c->destructive)
+               lzx_undo_e8_preprocessing(c->in_buffer, c->in_nbytes);
+       return result;
 }
 
 static void
@@ -2146,7 +2163,8 @@ lzx_free_compressor(void *_c)
 {
        struct lzx_compressor *c = _c;
 
-       FREE(c->in_buffer);
+       if (!c->destructive)
+               FREE(c->in_buffer);
        ALIGNED_FREE(c);
 }
 
index 1c824a57634520cdd3974e32ef88cfbe47cf7aa6..cf29df99268d2801e97cdab246c44392d1bfdea9 100644 (file)
@@ -1031,7 +1031,8 @@ xpress_get_compressor_size(size_t max_bufsize, unsigned compression_level)
 }
 
 static u64
-xpress_get_needed_memory(size_t max_bufsize, unsigned compression_level)
+xpress_get_needed_memory(size_t max_bufsize, unsigned compression_level,
+                        bool destructive)
 {
        u64 size = 0;
 
@@ -1060,7 +1061,7 @@ xpress_get_needed_memory(size_t max_bufsize, unsigned compression_level)
 
 static int
 xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
-                        void **c_ret)
+                        bool destructive, void **c_ret)
 {
        struct xpress_compressor *c;