xpress_compress.c: adjust parameters for slightly stronger compression

[wimlib] / src / xpress_compress.c
diff --git a/src/xpress_compress.c b/src/xpress_compress.c

index 5298af64ddb342e8598ee090cef50d48393738c8..5c3249d05ec79eb61447cd2d0e15b078005e5841 100644 (file)
--- a/src/xpress_compress.c
+++ b/src/xpress_compress.c
@@ -46,10 +46,10 @@
  #define MIN_LEVEL_FOR_NEAR_OPTIMAL     60
  
  /*
- * The maximum window order for the matchfinder.  This must be the base 2
- * logarithm of the maximum buffer size.
+ * Matchfinder definitions.  For XPRESS, only a 16-bit matchfinder is needed.
   */
-#define MATCHFINDER_MAX_WINDOW_ORDER   16
+#define mf_pos_t       u16
+#define MF_SUFFIX
  
  /*
   * Note: although XPRESS can potentially use a sliding window, it isn't well
@@ -59,8 +59,6 @@
   * optimizations.
   */
  
-#include <string.h>
-
  #include "wimlib/bitops.h"
  #include "wimlib/compress_common.h"
  #include "wimlib/compressor_ops.h"
@@ -396,11 +394,11 @@ xpress_write_item_list(struct xpress_output_bitstream *os,
                        struct xpress_optimum_node *optimum_nodes,
                        size_t count, const u32 codewords[], const u8 lens[])
  {
-       struct xpress_optimum_node *cur_optimum_ptr = optimum_nodes;
-       struct xpress_optimum_node *end_optimum_ptr = optimum_nodes + count;
+       struct xpress_optimum_node *cur_node = optimum_nodes;
+       struct xpress_optimum_node *end_node = optimum_nodes + count;
         do {
-               unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
-               unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+               unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+               unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
  
                 if (length == 1) {
                         /* Literal  */
@@ -410,22 +408,22 @@ xpress_write_item_list(struct xpress_output_bitstream *os,
                 } else {
                         /* Match  */
                         unsigned adjusted_len;
-                       unsigned offset_high_bit;
+                       unsigned log2_offset;
                         unsigned len_hdr;
                         unsigned sym;
  
                         adjusted_len = length - XPRESS_MIN_MATCH_LEN;
-                       offset_high_bit = fls32(offset);
+                       log2_offset = fls32(offset);
                         len_hdr = min(0xF, adjusted_len);
-                       sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+                       sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
  
                         xpress_write_bits(os, codewords[sym], lens[sym]);
                         xpress_write_extra_length_bytes(os, adjusted_len);
-                       xpress_write_bits(os, offset - (1U << offset_high_bit),
-                                         offset_high_bit);
+                       xpress_write_bits(os, offset - (1U << log2_offset),
+                                         log2_offset);
                 }
-               cur_optimum_ptr += length;
-       } while (cur_optimum_ptr != end_optimum_ptr);
+               cur_node += length;
+       } while (cur_node != end_node);
  }
  #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
  
@@ -503,16 +501,16 @@ xpress_record_match(struct xpress_compressor *c, unsigned length, unsigned offse
  {
         unsigned adjusted_len = length - XPRESS_MIN_MATCH_LEN;
         unsigned len_hdr = min(adjusted_len, 0xF);
-       unsigned offset_high_bit = fls32(offset);
-       unsigned sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+       unsigned log2_offset = fls32(offset);
+       unsigned sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
  
         c->freqs[sym]++;
  
         return (struct xpress_item) {
                 .data = (u64)sym |
                         ((u64)adjusted_len << 9) |
-                       ((u64)offset_high_bit << 25) |
-                       ((u64)(offset ^ (1U << offset_high_bit)) << 29),
+                       ((u64)log2_offset << 25) |
+                       ((u64)(offset ^ (1U << log2_offset)) << 29),
         };
  }
  
@@ -736,13 +734,13 @@ xpress_update_costs(struct xpress_compressor *c)
   */
  static void
  xpress_tally_item_list(struct xpress_compressor *c,
-                      struct xpress_optimum_node *end_optimum_ptr)
+                      struct xpress_optimum_node *end_node)
  {
-       struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes;
+       struct xpress_optimum_node *cur_node = c->optimum_nodes;
  
         do {
-               unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
-               unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+               unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+               unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
  
                 if (length == 1) {
                         /* Literal  */
@@ -752,19 +750,19 @@ xpress_tally_item_list(struct xpress_compressor *c,
                 } else {
                         /* Match  */
                         unsigned adjusted_len;
-                       unsigned offset_high_bit;
+                       unsigned log2_offset;
                         unsigned len_hdr;
                         unsigned sym;
  
                         adjusted_len = length - XPRESS_MIN_MATCH_LEN;
-                       offset_high_bit = fls32(offset);
+                       log2_offset = fls32(offset);
                         len_hdr = min(0xF, adjusted_len);
-                       sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+                       sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
  
                         c->freqs[sym]++;
                 }
-               cur_optimum_ptr += length;
-       } while (cur_optimum_ptr != end_optimum_ptr);
+               cur_node += length;
+       } while (cur_node != end_node);
  }
  
  /*
@@ -783,10 +781,10 @@ static void
  xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                           struct lz_match *end_cache_ptr)
  {
-       struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes + in_nbytes;
+       struct xpress_optimum_node *cur_node = c->optimum_nodes + in_nbytes;
         struct lz_match *cache_ptr = end_cache_ptr;
  
-       cur_optimum_ptr->cost_to_end = 0;
+       cur_node->cost_to_end = 0;
         do {
                 unsigned literal;
                 u32 best_item;
@@ -795,7 +793,7 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                 struct lz_match *match;
                 unsigned len;
  
-               cur_optimum_ptr--;
+               cur_node--;
                 cache_ptr--;
  
                 literal = cache_ptr->offset;
@@ -803,14 +801,14 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                 /* Consider coding a literal.  */
                 best_item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
                 best_cost_to_end = c->costs[literal] +
-                                  (cur_optimum_ptr + 1)->cost_to_end;
+                                  (cur_node + 1)->cost_to_end;
  
                 num_matches = cache_ptr->length;
  
                 if (num_matches == 0) {
                         /* No matches; the only choice is the literal.  */
-                       cur_optimum_ptr->cost_to_end = best_cost_to_end;
-                       cur_optimum_ptr->item = best_item;
+                       cur_node->cost_to_end = best_cost_to_end;
+                       cur_node->item = best_item;
                         continue;
                 }
  
@@ -829,12 +827,12 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                         /* All lengths are small.  Optimize accordingly.  */
                         do {
                                 unsigned offset;
-                               unsigned offset_high_bit;
+                               unsigned log2_offset;
                                 u32 offset_cost;
  
                                 offset = match->offset;
-                               offset_high_bit = fls32(offset);
-                               offset_cost = offset_high_bit;
+                               log2_offset = fls32(offset);
+                               offset_cost = log2_offset;
                                 do {
                                         unsigned len_hdr;
                                         unsigned sym;
@@ -842,10 +840,10 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
  
                                         len_hdr = len - XPRESS_MIN_MATCH_LEN;
                                         sym = XPRESS_NUM_CHARS +
-                                             ((offset_high_bit << 4) | len_hdr);
+                                             ((log2_offset << 4) | len_hdr);
                                         cost_to_end =
                                                 offset_cost + c->costs[sym] +
-                                               (cur_optimum_ptr + len)->cost_to_end;
+                                               (cur_node + len)->cost_to_end;
                                         if (cost_to_end < best_cost_to_end) {
                                                 best_cost_to_end = cost_to_end;
                                                 best_item =
@@ -858,12 +856,12 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                         /* Some lengths are big.  */
                         do {
                                 unsigned offset;
-                               unsigned offset_high_bit;
+                               unsigned log2_offset;
                                 u32 offset_cost;
  
                                 offset = match->offset;
-                               offset_high_bit = fls32(offset);
-                               offset_cost = offset_high_bit;
+                               log2_offset = fls32(offset);
+                               offset_cost = log2_offset;
                                 do {
                                         unsigned adjusted_len;
                                         unsigned len_hdr;
@@ -873,10 +871,10 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                                         adjusted_len = len - XPRESS_MIN_MATCH_LEN;
                                         len_hdr = min(adjusted_len, 0xF);
                                         sym = XPRESS_NUM_CHARS +
-                                             ((offset_high_bit << 4) | len_hdr);
+                                             ((log2_offset << 4) | len_hdr);
                                         cost_to_end =
                                                 offset_cost + c->costs[sym] +
-                                               (cur_optimum_ptr + len)->cost_to_end;
+                                               (cur_node + len)->cost_to_end;
                                         if (adjusted_len >= 0xF) {
                                                 cost_to_end += 8;
                                                 if (adjusted_len - 0xF >= 0xFF)
@@ -892,9 +890,9 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                         } while (++match != cache_ptr);
                 }
                 cache_ptr -= num_matches;
-               cur_optimum_ptr->cost_to_end = best_cost_to_end;
-               cur_optimum_ptr->item = best_item;
-       } while (cur_optimum_ptr != c->optimum_nodes);
+               cur_node->cost_to_end = best_cost_to_end;
+               cur_node->item = best_item;
+       } while (cur_node != c->optimum_nodes);
  }
  
  /*
@@ -908,28 +906,22 @@ xpress_find_matches(struct xpress_compressor * restrict c,
  {
         const u8 * const in_begin = in;
         const u8 *in_next = in_begin;
-       const u8 * const in_end = in_begin + in_nbytes;
         struct lz_match *cache_ptr = c->match_cache;
-       u32 next_hash;
+       u32 next_hashes[2] = {};
+       u32 max_len = in_nbytes;
+       u32 nice_len = min(max_len, c->nice_match_length);
  
         bt_matchfinder_init(&c->bt_mf);
-       next_hash = bt_matchfinder_hash_3_bytes(in_next);
  
-       do {
+       for (;;) {
                 struct lz_match *matches;
-               unsigned best_len;
+               u32 best_len;
  
                 /* If we've found so many matches that the cache might overflow
                  * if we keep finding more, then stop finding matches.  This
                  * case is very unlikely.  */
-               if (unlikely(cache_ptr >= c->cache_overflow_mark)) {
-                       do {
-                               cache_ptr->length = 0;
-                               cache_ptr->offset = *in_next++;
-                               cache_ptr++;
-                       } while (in_next != in_end);
-                       return cache_ptr;
-               }
+               if (unlikely(cache_ptr >= c->cache_overflow_mark || max_len < 5))
+                       break;
  
                 matches = cache_ptr;
  
@@ -939,18 +931,18 @@ xpress_find_matches(struct xpress_compressor * restrict c,
                 cache_ptr =
                         bt_matchfinder_get_matches(&c->bt_mf,
                                                    in_begin,
-                                                  in_next,
-                                                  XPRESS_MIN_MATCH_LEN,
-                                                  in_end - in_next,
-                                                  min(in_end - in_next, c->nice_match_length),
+                                                  in_next - in_begin,
+                                                  max_len,
+                                                  nice_len,
                                                    c->max_search_depth,
-                                                  &next_hash,
+                                                  next_hashes,
                                                    &best_len,
                                                    cache_ptr);
                 cache_ptr->length = cache_ptr - matches;
-               cache_ptr->offset = *in_next;
-               in_next++;
+               cache_ptr->offset = *in_next++;
                 cache_ptr++;
+               max_len--;
+               nice_len = min(nice_len, max_len);
  
                 /*
                  * If there was a very long match found, then don't cache any
@@ -962,24 +954,32 @@ xpress_find_matches(struct xpress_compressor * restrict c,
                  * very much.  If there's a long match, then the data must be
                  * highly compressible, so it doesn't matter as much what we do.
                  */
-               if (best_len >= c->nice_match_length) {
+               if (best_len >= nice_len) {
+                       if (unlikely(best_len + 5 >= max_len))
+                               break;
                         --best_len;
                         do {
                                 bt_matchfinder_skip_position(&c->bt_mf,
                                                              in_begin,
-                                                            in_next,
-                                                            in_end,
-                                                            min(in_end - in_next,
-                                                                c->nice_match_length),
+                                                            in_next - in_begin,
+                                                            max_len,
+                                                            nice_len,
                                                              c->max_search_depth,
-                                                            &next_hash);
-
+                                                            next_hashes);
                                 cache_ptr->length = 0;
                                 cache_ptr->offset = *in_next++;
                                 cache_ptr++;
+                               max_len--;
+                               nice_len = min(nice_len, max_len);
                         } while (--best_len);
                 }
-       } while (in_next != in_end);
+       }
+
+       while (max_len--) {
+               cache_ptr->length = 0;
+               cache_ptr->offset = *in_next++;
+               cache_ptr++;
+       }
  
         return cache_ptr;
  }
@@ -1090,12 +1090,12 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
  
                 if (compression_level < 30) {
                         c->impl = xpress_compress_greedy;
-                       c->max_search_depth = (compression_level * 24) / 16;
-                       c->nice_match_length = (compression_level * 48) / 16;
+                       c->max_search_depth = (compression_level * 30) / 16;
+                       c->nice_match_length = (compression_level * 60) / 16;
                 } else {
                         c->impl = xpress_compress_lazy;
-                       c->max_search_depth = (compression_level * 24) / 32;
-                       c->nice_match_length = (compression_level * 48) / 32;
+                       c->max_search_depth = (compression_level * 30) / 32;
+                       c->nice_match_length = (compression_level * 60) / 32;
  
                         /* xpress_compress_lazy() needs max_search_depth >= 2
                          * because it halves the max_search_depth when
@@ -1122,8 +1122,8 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
                         &c->match_cache[max_bufsize * CACHE_RESERVE_PER_POS];
  
                 c->impl = xpress_compress_near_optimal;
-               c->max_search_depth = (compression_level * 32) / 100;
-               c->nice_match_length = (compression_level * 50) / 100;
+               c->max_search_depth = (compression_level * 28) / 100;
+               c->nice_match_length = (compression_level * 56) / 100;
                 c->num_optim_passes = compression_level / 40;
         }
  #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */