]> wimlib.net Git - wimlib/blobdiff - src/xpress_compress.c
Rename 'pos_t' to 'mf_pos_t'
[wimlib] / src / xpress_compress.c
index 04105ae06fb53c319f1e5a6c0f21091c2e8a9b00..b57d0ea62355157dbf72a16c2f8c75bd503e2ab4 100644 (file)
 #define MIN_LEVEL_FOR_NEAR_OPTIMAL     60
 
 /*
- * The window order for the matchfinder.  This must be the base 2 logarithm of
- * the maximum buffer size.
+ * Matchfinder definitions.  For XPRESS, only a 16-bit matchfinder is needed.
  */
-#define MATCHFINDER_WINDOW_ORDER       16
+#define mf_pos_t       u16
+#define MF_SUFFIX
 
 /*
- * Although XPRESS can potentially use a sliding window, it isn't well suited
- * for large buffers of data because there is no way to reset the Huffman code.
- * Therefore, we only allow buffers in which there is no restriction on match
- * offsets (no sliding window).  This simplifies the code and allows some
+ * Note: although XPRESS can potentially use a sliding window, it isn't well
+ * suited for large buffers of data because there is no way to reset the Huffman
+ * code.  Therefore, we only allow buffers in which there is no restriction on
+ * match offsets (no sliding window).  This simplifies the code and allows some
  * optimizations.
  */
-#define MATCHFINDER_IS_SLIDING         0
-
-#include <string.h>
 
 #include "wimlib/bitops.h"
 #include "wimlib/compress_common.h"
@@ -122,21 +119,21 @@ struct xpress_compressor {
        union {
                /* Data for greedy or lazy parsing  */
                struct {
-                       struct hc_matchfinder hc_mf;
                        struct xpress_item *chosen_items;
-                       u8 nonoptimal_end[0];
+                       struct hc_matchfinder hc_mf;
+                       /* hc_mf must be last!  */
                };
 
        #if SUPPORT_NEAR_OPTIMAL_PARSING
                /* Data for near-optimal parsing  */
                struct {
-                       struct bt_matchfinder bt_mf;
                        struct xpress_optimum_node *optimum_nodes;
                        struct lz_match *match_cache;
                        struct lz_match *cache_overflow_mark;
                        unsigned num_optim_passes;
                        u32 costs[XPRESS_NUM_SYMBOLS];
-                       u8 optimal_end[0];
+                       struct bt_matchfinder bt_mf;
+                       /* bt_mf must be last!  */
                };
        #endif
        };
@@ -527,11 +524,12 @@ xpress_compress_greedy(struct xpress_compressor * restrict c,
                       const void * restrict in, size_t in_nbytes,
                       void * restrict out, size_t out_nbytes_avail)
 {
-       const u8 * const in_base = in;
-       const u8 *       in_next = in_base;
-       const u8 * const in_end = in_base + in_nbytes;
+       const u8 * const in_begin = in;
+       const u8 *       in_next = in_begin;
+       const u8 * const in_end = in_begin + in_nbytes;
        struct xpress_item *next_chosen_item = c->chosen_items;
        unsigned len_3_too_far;
+       u32 next_hashes[2] = {};
 
        if (in_nbytes <= 8192)
                len_3_too_far = 2048;
@@ -545,12 +543,13 @@ xpress_compress_greedy(struct xpress_compressor * restrict c,
                unsigned offset;
 
                length = hc_matchfinder_longest_match(&c->hc_mf,
-                                                     in_base,
-                                                     in_next,
+                                                     in_begin,
+                                                     in_next - in_begin,
                                                      XPRESS_MIN_MATCH_LEN - 1,
                                                      in_end - in_next,
                                                      min(in_end - in_next, c->nice_match_length),
                                                      c->max_search_depth,
+                                                     next_hashes,
                                                      &offset);
                if (length >= XPRESS_MIN_MATCH_LEN &&
                    !(length == XPRESS_MIN_MATCH_LEN && offset >= len_3_too_far))
@@ -560,10 +559,11 @@ xpress_compress_greedy(struct xpress_compressor * restrict c,
                                xpress_record_match(c, length, offset);
                        in_next += 1;
                        hc_matchfinder_skip_positions(&c->hc_mf,
-                                                     in_base,
-                                                     in_next,
-                                                     in_end,
-                                                     length - 1);
+                                                     in_begin,
+                                                     in_next - in_begin,
+                                                     in_end - in_begin,
+                                                     length - 1,
+                                                     next_hashes);
                        in_next += length - 1;
                } else {
                        /* No match found  */
@@ -587,11 +587,12 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                     const void * restrict in, size_t in_nbytes,
                     void * restrict out, size_t out_nbytes_avail)
 {
-       const u8 * const in_base = in;
-       const u8 *       in_next = in_base;
-       const u8 * const in_end = in_base + in_nbytes;
+       const u8 * const in_begin = in;
+       const u8 *       in_next = in_begin;
+       const u8 * const in_end = in_begin + in_nbytes;
        struct xpress_item *next_chosen_item = c->chosen_items;
        unsigned len_3_too_far;
+       u32 next_hashes[2] = {};
 
        if (in_nbytes <= 8192)
                len_3_too_far = 2048;
@@ -608,12 +609,13 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
 
                /* Find the longest match at the current position.  */
                cur_len = hc_matchfinder_longest_match(&c->hc_mf,
-                                                      in_base,
-                                                      in_next,
+                                                      in_begin,
+                                                      in_next - in_begin,
                                                       XPRESS_MIN_MATCH_LEN - 1,
                                                       in_end - in_next,
                                                       min(in_end - in_next, c->nice_match_length),
                                                       c->max_search_depth,
+                                                      next_hashes,
                                                       &cur_offset);
                in_next += 1;
 
@@ -637,10 +639,11 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                                xpress_record_match(c, cur_len, cur_offset);
 
                        hc_matchfinder_skip_positions(&c->hc_mf,
-                                                     in_base,
-                                                     in_next,
-                                                     in_end,
-                                                     cur_len - 1);
+                                                     in_begin,
+                                                     in_next - in_begin,
+                                                     in_end - in_begin,
+                                                     cur_len - 1,
+                                                     next_hashes);
                        in_next += cur_len - 1;
                        continue;
                }
@@ -662,12 +665,13 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                 * longest_match() inlined at each.
                 */
                next_len = hc_matchfinder_longest_match(&c->hc_mf,
-                                                       in_base,
-                                                       in_next,
+                                                       in_begin,
+                                                       in_next - in_begin,
                                                        cur_len,
                                                        in_end - in_next,
                                                        min(in_end - in_next, c->nice_match_length),
                                                        c->max_search_depth / 2,
+                                                       next_hashes,
                                                        &next_offset);
                in_next += 1;
 
@@ -685,10 +689,11 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                        *next_chosen_item++ =
                                xpress_record_match(c, cur_len, cur_offset);
                        hc_matchfinder_skip_positions(&c->hc_mf,
-                                                     in_base,
-                                                     in_next,
-                                                     in_end,
-                                                     cur_len - 2);
+                                                     in_begin,
+                                                     in_next - in_begin,
+                                                     in_end - in_begin,
+                                                     cur_len - 2,
+                                                     next_hashes);
                        in_next += cur_len - 2;
                        continue;
                }
@@ -899,16 +904,17 @@ static struct lz_match *
 xpress_find_matches(struct xpress_compressor * restrict c,
                    const void * restrict in, size_t in_nbytes)
 {
-       const u8 * const in_base = in;
-       const u8 *in_next = in_base;
-       const u8 * const in_end = in_base + in_nbytes;
+       const u8 * const in_begin = in;
+       const u8 *in_next = in_begin;
+       const u8 * const in_end = in_begin + in_nbytes;
        struct lz_match *cache_ptr = c->match_cache;
-       unsigned long prev_hash = 0;
+       u32 next_hash = 0;
 
        bt_matchfinder_init(&c->bt_mf);
 
        do {
-               unsigned num_matches;
+               struct lz_match *matches;
+               unsigned best_len;
 
                /* If we've found so many matches that the cache might overflow
                 * if we keep finding more, then stop finding matches.  This
@@ -922,56 +928,52 @@ xpress_find_matches(struct xpress_compressor * restrict c,
                        return cache_ptr;
                }
 
+               matches = cache_ptr;
+
                /* Find matches with the current position using the binary tree
                 * matchfinder and save them in the next available slots in
                 * the match cache.  */
-               num_matches =
+               cache_ptr =
                        bt_matchfinder_get_matches(&c->bt_mf,
-                                                  in_base,
-                                                  in_next,
-                                                  XPRESS_MIN_MATCH_LEN,
+                                                  in_begin,
+                                                  in_next - in_begin,
                                                   in_end - in_next,
                                                   min(in_end - in_next, c->nice_match_length),
                                                   c->max_search_depth,
-                                                  &prev_hash,
+                                                  &next_hash,
+                                                  &best_len,
                                                   cache_ptr);
-               cache_ptr += num_matches;
-               cache_ptr->length = num_matches;
+               cache_ptr->length = cache_ptr - matches;
                cache_ptr->offset = *in_next;
                in_next++;
                cache_ptr++;
 
-               if (num_matches) {
-                       /*
-                        * If there was a very long match found, then don't
-                        * cache any matches for the bytes covered by that
-                        * match.  This avoids degenerate behavior when
-                        * compressing highly redundant data, where the number
-                        * of matches can be very large.
-                        *
-                        * This heuristic doesn't actually hurt the compression
-                        * ratio very much.  If there's a long match, then the
-                        * data must be highly compressible, so it doesn't
-                        * matter as much what we do.
-                        */
-                       unsigned best_len = cache_ptr[-2].length;
-                       if (best_len >= c->nice_match_length) {
-                               --best_len;
-                               do {
-                                       bt_matchfinder_skip_position(&c->bt_mf,
-                                                                    in_base,
-                                                                    in_next,
-                                                                    in_end,
-                                                                    min(in_end - in_next,
-                                                                        c->nice_match_length),
-                                                                    c->max_search_depth,
-                                                                    &prev_hash);
-
-                                       cache_ptr->length = 0;
-                                       cache_ptr->offset = *in_next++;
-                                       cache_ptr++;
-                               } while (--best_len);
-                       }
+               /*
+                * If there was a very long match found, then don't cache any
+                * matches for the bytes covered by that match.  This avoids
+                * degenerate behavior when compressing highly redundant data,
+                * where the number of matches can be very large.
+                *
+                * This heuristic doesn't actually hurt the compression ratio
+                * very much.  If there's a long match, then the data must be
+                * highly compressible, so it doesn't matter as much what we do.
+                */
+               if (best_len >= c->nice_match_length) {
+                       --best_len;
+                       do {
+                               bt_matchfinder_skip_position(&c->bt_mf,
+                                                            in_begin,
+                                                            in_next - in_begin,
+                                                            in_end - in_next,
+                                                            min(in_end - in_next,
+                                                                c->nice_match_length),
+                                                            c->max_search_depth,
+                                                            &next_hash);
+
+                               cache_ptr->length = 0;
+                               cache_ptr->offset = *in_next++;
+                               cache_ptr++;
+                       } while (--best_len);
                }
        } while (in_next != in_end);
 
@@ -1019,23 +1021,40 @@ xpress_compress_near_optimal(struct xpress_compressor * restrict c,
 
 #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
 
+static size_t
+xpress_get_compressor_size(size_t max_bufsize, unsigned compression_level)
+{
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+       if (compression_level >= MIN_LEVEL_FOR_NEAR_OPTIMAL)
+               return offsetof(struct xpress_compressor, bt_mf) +
+                       bt_matchfinder_size(max_bufsize);
+#endif
+
+       return offsetof(struct xpress_compressor, hc_mf) +
+               hc_matchfinder_size(max_bufsize);
+}
+
 static u64
-xpress_get_needed_memory(size_t max_bufsize, unsigned compression_level)
+xpress_get_needed_memory(size_t max_bufsize, unsigned compression_level,
+                        bool destructive)
 {
-       size_t size = 0;
+       u64 size = 0;
 
        if (max_bufsize > XPRESS_MAX_BUFSIZE)
                return 0;
 
+       size += xpress_get_compressor_size(max_bufsize, compression_level);
+
        if (compression_level < MIN_LEVEL_FOR_NEAR_OPTIMAL ||
            !SUPPORT_NEAR_OPTIMAL_PARSING) {
-               size += offsetof(struct xpress_compressor, nonoptimal_end);
+               /* chosen_items  */
                size += max_bufsize * sizeof(struct xpress_item);
        }
 #if SUPPORT_NEAR_OPTIMAL_PARSING
        else {
-               size += offsetof(struct xpress_compressor, optimal_end);
+               /* optimum_nodes  */
                size += (max_bufsize + 1) * sizeof(struct xpress_optimum_node);
+               /* match_cache */
                size += ((max_bufsize * CACHE_RESERVE_PER_POS) +
                         XPRESS_MAX_MATCH_LEN + max_bufsize) *
                                sizeof(struct lz_match);
@@ -1046,56 +1065,44 @@ xpress_get_needed_memory(size_t max_bufsize, unsigned compression_level)
 
 static int
 xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
-                        void **c_ret)
+                        bool destructive, void **c_ret)
 {
        struct xpress_compressor *c;
 
        if (max_bufsize > XPRESS_MAX_BUFSIZE)
                return WIMLIB_ERR_INVALID_PARAM;
 
-       if (compression_level < 30) {
-               c = ALIGNED_MALLOC(offsetof(struct xpress_compressor,
-                                           nonoptimal_end),
-                                  MATCHFINDER_ALIGNMENT);
-               if (!c)
-                       return WIMLIB_ERR_NOMEM;
-               c->impl = xpress_compress_greedy;
-               c->max_search_depth = (compression_level * 24) / 16;
-               c->nice_match_length = (compression_level * 48) / 16;
-               c->chosen_items = MALLOC(max_bufsize * sizeof(struct xpress_item));
-               if (!c->chosen_items) {
-                       ALIGNED_FREE(c);
-                       return WIMLIB_ERR_NOMEM;
-               }
-       } else if (compression_level < MIN_LEVEL_FOR_NEAR_OPTIMAL ||
-                  !SUPPORT_NEAR_OPTIMAL_PARSING)
+       c = MALLOC(xpress_get_compressor_size(max_bufsize, compression_level));
+       if (!c)
+               goto oom0;
+
+       if (compression_level < MIN_LEVEL_FOR_NEAR_OPTIMAL ||
+           !SUPPORT_NEAR_OPTIMAL_PARSING)
        {
-               c = ALIGNED_MALLOC(offsetof(struct xpress_compressor,
-                                           nonoptimal_end),
-                                  MATCHFINDER_ALIGNMENT);
-               if (!c)
-                       return WIMLIB_ERR_NOMEM;
-
-               c->impl = xpress_compress_lazy;
-               c->max_search_depth = (compression_level * 24) / 32;
-               c->nice_match_length = (compression_level * 48) / 32;
+
                c->chosen_items = MALLOC(max_bufsize * sizeof(struct xpress_item));
-               if (!c->chosen_items) {
-                       ALIGNED_FREE(c);
-                       return WIMLIB_ERR_NOMEM;
+               if (!c->chosen_items)
+                       goto oom1;
+
+               if (compression_level < 30) {
+                       c->impl = xpress_compress_greedy;
+                       c->max_search_depth = (compression_level * 24) / 16;
+                       c->nice_match_length = (compression_level * 48) / 16;
+               } else {
+                       c->impl = xpress_compress_lazy;
+                       c->max_search_depth = (compression_level * 24) / 32;
+                       c->nice_match_length = (compression_level * 48) / 32;
+
+                       /* xpress_compress_lazy() needs max_search_depth >= 2
+                        * because it halves the max_search_depth when
+                        * attempting a lazy match, and max_search_depth cannot
+                        * be 0.  */
+                       if (c->max_search_depth < 2)
+                               c->max_search_depth = 2;
                }
        }
 #if SUPPORT_NEAR_OPTIMAL_PARSING
        else {
-               c = ALIGNED_MALLOC(offsetof(struct xpress_compressor,
-                                           optimal_end),
-                                  MATCHFINDER_ALIGNMENT);
-               if (!c)
-                       return WIMLIB_ERR_NOMEM;
-               c->impl = xpress_compress_near_optimal;
-               c->max_search_depth = (compression_level * 32) / 100;
-               c->nice_match_length = (compression_level * 50) / 100;
-               c->num_optim_passes = compression_level / 40;
 
                c->optimum_nodes = MALLOC((max_bufsize + 1) *
                                          sizeof(struct xpress_optimum_node));
@@ -1105,24 +1112,41 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
                if (!c->optimum_nodes || !c->match_cache) {
                        FREE(c->optimum_nodes);
                        FREE(c->match_cache);
-                       ALIGNED_FREE(c);
-                       return WIMLIB_ERR_NOMEM;
+                       goto oom1;
                }
                c->cache_overflow_mark =
                        &c->match_cache[max_bufsize * CACHE_RESERVE_PER_POS];
+
+               c->impl = xpress_compress_near_optimal;
+               c->max_search_depth = (compression_level * 32) / 100;
+               c->nice_match_length = (compression_level * 50) / 100;
+               c->num_optim_passes = compression_level / 40;
        }
 #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
 
+       /* max_search_depth == 0 is invalid.  */
+       if (c->max_search_depth < 1)
+               c->max_search_depth = 1;
+
        *c_ret = c;
        return 0;
+
+oom1:
+       FREE(c);
+oom0:
+       return WIMLIB_ERR_NOMEM;
 }
 
 static size_t
-xpress_compress(const void *in, size_t in_nbytes,
-               void *out, size_t out_nbytes_avail, void *_c)
+xpress_compress(const void *restrict in, size_t in_nbytes,
+               void *restrict out, size_t out_nbytes_avail, void *restrict _c)
 {
        struct xpress_compressor *c = _c;
 
+       /* Don't bother trying to compress very small inputs.  */
+       if (in_nbytes < 25)
+               return 0;
+
        if (out_nbytes_avail <= XPRESS_NUM_SYMBOLS / 2 + 4)
                return 0;
 
@@ -1136,16 +1160,14 @@ xpress_free_compressor(void *_c)
 {
        struct xpress_compressor *c = _c;
 
-       if (c) {
-       #if SUPPORT_NEAR_OPTIMAL_PARSING
-               if (c->impl == xpress_compress_near_optimal) {
-                       FREE(c->optimum_nodes);
-                       FREE(c->match_cache);
-               } else
-       #endif
-                       FREE(c->chosen_items);
-               ALIGNED_FREE(c);
-       }
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+       if (c->impl == xpress_compress_near_optimal) {
+               FREE(c->optimum_nodes);
+               FREE(c->match_cache);
+       } else
+#endif
+               FREE(c->chosen_items);
+       FREE(c);
 }
 
 const struct compressor_ops xpress_compressor_ops = {