]> wimlib.net Git - wimlib/blobdiff - src/xpress_compress.c
bt_matchfinder: use 4-byte hashing for trees
[wimlib] / src / xpress_compress.c
index 65c4b026627bdeaf912b29e04cdba435779f7cef..776de7cdc4c33032f82a75d7d4333f041df36217 100644 (file)
 #define MIN_LEVEL_FOR_NEAR_OPTIMAL     60
 
 /*
- * The maximum window order for the matchfinder.  This must be the base 2
- * logarithm of the maximum buffer size.
+ * Matchfinder definitions.  For XPRESS, only a 16-bit matchfinder is needed.
  */
-#define MATCHFINDER_MAX_WINDOW_ORDER   16
+#define mf_pos_t       u16
+#define MF_SUFFIX
 
 /*
  * Note: although XPRESS can potentially use a sliding window, it isn't well
@@ -59,8 +59,6 @@
  * optimizations.
  */
 
-#include <string.h>
-
 #include "wimlib/bitops.h"
 #include "wimlib/compress_common.h"
 #include "wimlib/compressor_ops.h"
@@ -396,11 +394,11 @@ xpress_write_item_list(struct xpress_output_bitstream *os,
                       struct xpress_optimum_node *optimum_nodes,
                       size_t count, const u32 codewords[], const u8 lens[])
 {
-       struct xpress_optimum_node *cur_optimum_ptr = optimum_nodes;
-       struct xpress_optimum_node *end_optimum_ptr = optimum_nodes + count;
+       struct xpress_optimum_node *cur_node = optimum_nodes;
+       struct xpress_optimum_node *end_node = optimum_nodes + count;
        do {
-               unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
-               unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+               unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+               unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
 
                if (length == 1) {
                        /* Literal  */
@@ -410,22 +408,22 @@ xpress_write_item_list(struct xpress_output_bitstream *os,
                } else {
                        /* Match  */
                        unsigned adjusted_len;
-                       unsigned offset_high_bit;
+                       unsigned log2_offset;
                        unsigned len_hdr;
                        unsigned sym;
 
                        adjusted_len = length - XPRESS_MIN_MATCH_LEN;
-                       offset_high_bit = fls32(offset);
+                       log2_offset = fls32(offset);
                        len_hdr = min(0xF, adjusted_len);
-                       sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+                       sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
 
                        xpress_write_bits(os, codewords[sym], lens[sym]);
                        xpress_write_extra_length_bytes(os, adjusted_len);
-                       xpress_write_bits(os, offset - (1U << offset_high_bit),
-                                         offset_high_bit);
+                       xpress_write_bits(os, offset - (1U << log2_offset),
+                                         log2_offset);
                }
-               cur_optimum_ptr += length;
-       } while (cur_optimum_ptr != end_optimum_ptr);
+               cur_node += length;
+       } while (cur_node != end_node);
 }
 #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
 
@@ -503,16 +501,16 @@ xpress_record_match(struct xpress_compressor *c, unsigned length, unsigned offse
 {
        unsigned adjusted_len = length - XPRESS_MIN_MATCH_LEN;
        unsigned len_hdr = min(adjusted_len, 0xF);
-       unsigned offset_high_bit = fls32(offset);
-       unsigned sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+       unsigned log2_offset = fls32(offset);
+       unsigned sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
 
        c->freqs[sym]++;
 
        return (struct xpress_item) {
                .data = (u64)sym |
                        ((u64)adjusted_len << 9) |
-                       ((u64)offset_high_bit << 25) |
-                       ((u64)(offset ^ (1U << offset_high_bit)) << 29),
+                       ((u64)log2_offset << 25) |
+                       ((u64)(offset ^ (1U << log2_offset)) << 29),
        };
 }
 
@@ -531,6 +529,7 @@ xpress_compress_greedy(struct xpress_compressor * restrict c,
        const u8 * const in_end = in_begin + in_nbytes;
        struct xpress_item *next_chosen_item = c->chosen_items;
        unsigned len_3_too_far;
+       u32 next_hashes[2] = {};
 
        if (in_nbytes <= 8192)
                len_3_too_far = 2048;
@@ -545,11 +544,12 @@ xpress_compress_greedy(struct xpress_compressor * restrict c,
 
                length = hc_matchfinder_longest_match(&c->hc_mf,
                                                      in_begin,
-                                                     in_next,
+                                                     in_next - in_begin,
                                                      XPRESS_MIN_MATCH_LEN - 1,
                                                      in_end - in_next,
                                                      min(in_end - in_next, c->nice_match_length),
                                                      c->max_search_depth,
+                                                     next_hashes,
                                                      &offset);
                if (length >= XPRESS_MIN_MATCH_LEN &&
                    !(length == XPRESS_MIN_MATCH_LEN && offset >= len_3_too_far))
@@ -560,9 +560,10 @@ xpress_compress_greedy(struct xpress_compressor * restrict c,
                        in_next += 1;
                        hc_matchfinder_skip_positions(&c->hc_mf,
                                                      in_begin,
-                                                     in_next,
-                                                     in_end,
-                                                     length - 1);
+                                                     in_next - in_begin,
+                                                     in_end - in_begin,
+                                                     length - 1,
+                                                     next_hashes);
                        in_next += length - 1;
                } else {
                        /* No match found  */
@@ -591,6 +592,7 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
        const u8 * const in_end = in_begin + in_nbytes;
        struct xpress_item *next_chosen_item = c->chosen_items;
        unsigned len_3_too_far;
+       u32 next_hashes[2] = {};
 
        if (in_nbytes <= 8192)
                len_3_too_far = 2048;
@@ -608,11 +610,12 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                /* Find the longest match at the current position.  */
                cur_len = hc_matchfinder_longest_match(&c->hc_mf,
                                                       in_begin,
-                                                      in_next,
+                                                      in_next - in_begin,
                                                       XPRESS_MIN_MATCH_LEN - 1,
                                                       in_end - in_next,
                                                       min(in_end - in_next, c->nice_match_length),
                                                       c->max_search_depth,
+                                                      next_hashes,
                                                       &cur_offset);
                in_next += 1;
 
@@ -637,9 +640,10 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
 
                        hc_matchfinder_skip_positions(&c->hc_mf,
                                                      in_begin,
-                                                     in_next,
-                                                     in_end,
-                                                     cur_len - 1);
+                                                     in_next - in_begin,
+                                                     in_end - in_begin,
+                                                     cur_len - 1,
+                                                     next_hashes);
                        in_next += cur_len - 1;
                        continue;
                }
@@ -662,11 +666,12 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                 */
                next_len = hc_matchfinder_longest_match(&c->hc_mf,
                                                        in_begin,
-                                                       in_next,
+                                                       in_next - in_begin,
                                                        cur_len,
                                                        in_end - in_next,
                                                        min(in_end - in_next, c->nice_match_length),
                                                        c->max_search_depth / 2,
+                                                       next_hashes,
                                                        &next_offset);
                in_next += 1;
 
@@ -685,9 +690,10 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                                xpress_record_match(c, cur_len, cur_offset);
                        hc_matchfinder_skip_positions(&c->hc_mf,
                                                      in_begin,
-                                                     in_next,
-                                                     in_end,
-                                                     cur_len - 2);
+                                                     in_next - in_begin,
+                                                     in_end - in_begin,
+                                                     cur_len - 2,
+                                                     next_hashes);
                        in_next += cur_len - 2;
                        continue;
                }
@@ -728,13 +734,13 @@ xpress_update_costs(struct xpress_compressor *c)
  */
 static void
 xpress_tally_item_list(struct xpress_compressor *c,
-                      struct xpress_optimum_node *end_optimum_ptr)
+                      struct xpress_optimum_node *end_node)
 {
-       struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes;
+       struct xpress_optimum_node *cur_node = c->optimum_nodes;
 
        do {
-               unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
-               unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+               unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+               unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
 
                if (length == 1) {
                        /* Literal  */
@@ -744,19 +750,19 @@ xpress_tally_item_list(struct xpress_compressor *c,
                } else {
                        /* Match  */
                        unsigned adjusted_len;
-                       unsigned offset_high_bit;
+                       unsigned log2_offset;
                        unsigned len_hdr;
                        unsigned sym;
 
                        adjusted_len = length - XPRESS_MIN_MATCH_LEN;
-                       offset_high_bit = fls32(offset);
+                       log2_offset = fls32(offset);
                        len_hdr = min(0xF, adjusted_len);
-                       sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+                       sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
 
                        c->freqs[sym]++;
                }
-               cur_optimum_ptr += length;
-       } while (cur_optimum_ptr != end_optimum_ptr);
+               cur_node += length;
+       } while (cur_node != end_node);
 }
 
 /*
@@ -775,10 +781,10 @@ static void
 xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                          struct lz_match *end_cache_ptr)
 {
-       struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes + in_nbytes;
+       struct xpress_optimum_node *cur_node = c->optimum_nodes + in_nbytes;
        struct lz_match *cache_ptr = end_cache_ptr;
 
-       cur_optimum_ptr->cost_to_end = 0;
+       cur_node->cost_to_end = 0;
        do {
                unsigned literal;
                u32 best_item;
@@ -787,7 +793,7 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                struct lz_match *match;
                unsigned len;
 
-               cur_optimum_ptr--;
+               cur_node--;
                cache_ptr--;
 
                literal = cache_ptr->offset;
@@ -795,14 +801,14 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                /* Consider coding a literal.  */
                best_item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
                best_cost_to_end = c->costs[literal] +
-                                  (cur_optimum_ptr + 1)->cost_to_end;
+                                  (cur_node + 1)->cost_to_end;
 
                num_matches = cache_ptr->length;
 
                if (num_matches == 0) {
                        /* No matches; the only choice is the literal.  */
-                       cur_optimum_ptr->cost_to_end = best_cost_to_end;
-                       cur_optimum_ptr->item = best_item;
+                       cur_node->cost_to_end = best_cost_to_end;
+                       cur_node->item = best_item;
                        continue;
                }
 
@@ -821,12 +827,12 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                        /* All lengths are small.  Optimize accordingly.  */
                        do {
                                unsigned offset;
-                               unsigned offset_high_bit;
+                               unsigned log2_offset;
                                u32 offset_cost;
 
                                offset = match->offset;
-                               offset_high_bit = fls32(offset);
-                               offset_cost = offset_high_bit;
+                               log2_offset = fls32(offset);
+                               offset_cost = log2_offset;
                                do {
                                        unsigned len_hdr;
                                        unsigned sym;
@@ -834,10 +840,10 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
 
                                        len_hdr = len - XPRESS_MIN_MATCH_LEN;
                                        sym = XPRESS_NUM_CHARS +
-                                             ((offset_high_bit << 4) | len_hdr);
+                                             ((log2_offset << 4) | len_hdr);
                                        cost_to_end =
                                                offset_cost + c->costs[sym] +
-                                               (cur_optimum_ptr + len)->cost_to_end;
+                                               (cur_node + len)->cost_to_end;
                                        if (cost_to_end < best_cost_to_end) {
                                                best_cost_to_end = cost_to_end;
                                                best_item =
@@ -850,12 +856,12 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                        /* Some lengths are big.  */
                        do {
                                unsigned offset;
-                               unsigned offset_high_bit;
+                               unsigned log2_offset;
                                u32 offset_cost;
 
                                offset = match->offset;
-                               offset_high_bit = fls32(offset);
-                               offset_cost = offset_high_bit;
+                               log2_offset = fls32(offset);
+                               offset_cost = log2_offset;
                                do {
                                        unsigned adjusted_len;
                                        unsigned len_hdr;
@@ -865,10 +871,10 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                                        adjusted_len = len - XPRESS_MIN_MATCH_LEN;
                                        len_hdr = min(adjusted_len, 0xF);
                                        sym = XPRESS_NUM_CHARS +
-                                             ((offset_high_bit << 4) | len_hdr);
+                                             ((log2_offset << 4) | len_hdr);
                                        cost_to_end =
                                                offset_cost + c->costs[sym] +
-                                               (cur_optimum_ptr + len)->cost_to_end;
+                                               (cur_node + len)->cost_to_end;
                                        if (adjusted_len >= 0xF) {
                                                cost_to_end += 8;
                                                if (adjusted_len - 0xF >= 0xFF)
@@ -884,9 +890,9 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                        } while (++match != cache_ptr);
                }
                cache_ptr -= num_matches;
-               cur_optimum_ptr->cost_to_end = best_cost_to_end;
-               cur_optimum_ptr->item = best_item;
-       } while (cur_optimum_ptr != c->optimum_nodes);
+               cur_node->cost_to_end = best_cost_to_end;
+               cur_node->item = best_item;
+       } while (cur_node != c->optimum_nodes);
 }
 
 /*
@@ -900,28 +906,22 @@ xpress_find_matches(struct xpress_compressor * restrict c,
 {
        const u8 * const in_begin = in;
        const u8 *in_next = in_begin;
-       const u8 * const in_end = in_begin + in_nbytes;
        struct lz_match *cache_ptr = c->match_cache;
-       u32 next_hash;
+       u32 next_hashes[2] = {};
+       u32 max_len = in_nbytes;
+       u32 nice_len = min(max_len, c->nice_match_length);
 
        bt_matchfinder_init(&c->bt_mf);
-       next_hash = bt_matchfinder_hash_3_bytes(in_next);
 
-       do {
+       for (;;) {
                struct lz_match *matches;
-               unsigned best_len;
+               u32 best_len;
 
                /* If we've found so many matches that the cache might overflow
                 * if we keep finding more, then stop finding matches.  This
                 * case is very unlikely.  */
-               if (unlikely(cache_ptr >= c->cache_overflow_mark)) {
-                       do {
-                               cache_ptr->length = 0;
-                               cache_ptr->offset = *in_next++;
-                               cache_ptr++;
-                       } while (in_next != in_end);
-                       return cache_ptr;
-               }
+               if (unlikely(cache_ptr >= c->cache_overflow_mark || max_len < 5))
+                       break;
 
                matches = cache_ptr;
 
@@ -931,18 +931,18 @@ xpress_find_matches(struct xpress_compressor * restrict c,
                cache_ptr =
                        bt_matchfinder_get_matches(&c->bt_mf,
                                                   in_begin,
-                                                  in_next,
-                                                  XPRESS_MIN_MATCH_LEN,
-                                                  in_end - in_next,
-                                                  min(in_end - in_next, c->nice_match_length),
+                                                  in_next - in_begin,
+                                                  max_len,
+                                                  nice_len,
                                                   c->max_search_depth,
-                                                  &next_hash,
+                                                  next_hashes,
                                                   &best_len,
                                                   cache_ptr);
                cache_ptr->length = cache_ptr - matches;
-               cache_ptr->offset = *in_next;
-               in_next++;
+               cache_ptr->offset = *in_next++;
                cache_ptr++;
+               max_len--;
+               nice_len = min(nice_len, max_len);
 
                /*
                 * If there was a very long match found, then don't cache any
@@ -954,24 +954,32 @@ xpress_find_matches(struct xpress_compressor * restrict c,
                 * very much.  If there's a long match, then the data must be
                 * highly compressible, so it doesn't matter as much what we do.
                 */
-               if (best_len >= c->nice_match_length) {
+               if (best_len >= nice_len) {
+                       if (unlikely(best_len + 5 >= max_len))
+                               break;
                        --best_len;
                        do {
                                bt_matchfinder_skip_position(&c->bt_mf,
                                                             in_begin,
-                                                            in_next,
-                                                            in_end,
-                                                            min(in_end - in_next,
-                                                                c->nice_match_length),
+                                                            in_next - in_begin,
+                                                            max_len,
+                                                            nice_len,
                                                             c->max_search_depth,
-                                                            &next_hash);
-
+                                                            next_hashes);
                                cache_ptr->length = 0;
                                cache_ptr->offset = *in_next++;
                                cache_ptr++;
+                               max_len--;
+                               nice_len = min(nice_len, max_len);
                        } while (--best_len);
                }
-       } while (in_next != in_end);
+       }
+
+       while (max_len--) {
+               cache_ptr->length = 0;
+               cache_ptr->offset = *in_next++;
+               cache_ptr++;
+       }
 
        return cache_ptr;
 }
@@ -1031,7 +1039,8 @@ xpress_get_compressor_size(size_t max_bufsize, unsigned compression_level)
 }
 
 static u64
-xpress_get_needed_memory(size_t max_bufsize, unsigned compression_level)
+xpress_get_needed_memory(size_t max_bufsize, unsigned compression_level,
+                        bool destructive)
 {
        u64 size = 0;
 
@@ -1060,15 +1069,14 @@ xpress_get_needed_memory(size_t max_bufsize, unsigned compression_level)
 
 static int
 xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
-                        void **c_ret)
+                        bool destructive, void **c_ret)
 {
        struct xpress_compressor *c;
 
        if (max_bufsize > XPRESS_MAX_BUFSIZE)
                return WIMLIB_ERR_INVALID_PARAM;
 
-       c = ALIGNED_MALLOC(xpress_get_compressor_size(max_bufsize, compression_level),
-                          MATCHFINDER_ALIGNMENT);
+       c = MALLOC(xpress_get_compressor_size(max_bufsize, compression_level));
        if (!c)
                goto oom0;
 
@@ -1088,6 +1096,13 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
                        c->impl = xpress_compress_lazy;
                        c->max_search_depth = (compression_level * 24) / 32;
                        c->nice_match_length = (compression_level * 48) / 32;
+
+                       /* xpress_compress_lazy() needs max_search_depth >= 2
+                        * because it halves the max_search_depth when
+                        * attempting a lazy match, and max_search_depth cannot
+                        * be 0.  */
+                       if (c->max_search_depth < 2)
+                               c->max_search_depth = 2;
                }
        }
 #if SUPPORT_NEAR_OPTIMAL_PARSING
@@ -1113,18 +1128,22 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
        }
 #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
 
+       /* max_search_depth == 0 is invalid.  */
+       if (c->max_search_depth < 1)
+               c->max_search_depth = 1;
+
        *c_ret = c;
        return 0;
 
 oom1:
-       ALIGNED_FREE(c);
+       FREE(c);
 oom0:
        return WIMLIB_ERR_NOMEM;
 }
 
 static size_t
-xpress_compress(const void *in, size_t in_nbytes,
-               void *out, size_t out_nbytes_avail, void *_c)
+xpress_compress(const void *restrict in, size_t in_nbytes,
+               void *restrict out, size_t out_nbytes_avail, void *restrict _c)
 {
        struct xpress_compressor *c = _c;
 
@@ -1152,7 +1171,7 @@ xpress_free_compressor(void *_c)
        } else
 #endif
                FREE(c->chosen_items);
-       ALIGNED_FREE(c);
+       FREE(c);
 }
 
 const struct compressor_ops xpress_compressor_ops = {