]> wimlib.net Git - wimlib/blobdiff - src/xpress_compress.c
bt_matchfinder: remove unnecessary max_len parameter to skip routine
[wimlib] / src / xpress_compress.c
index cf29df99268d2801e97cdab246c44392d1bfdea9..d8cb7697deb9732eeab7a4e33c1d6093abb618fc 100644 (file)
 #define MIN_LEVEL_FOR_NEAR_OPTIMAL     60
 
 /*
- * The maximum window order for the matchfinder.  This must be the base 2
- * logarithm of the maximum buffer size.
+ * Matchfinder definitions.  For XPRESS, only a 16-bit matchfinder is needed.
  */
-#define MATCHFINDER_MAX_WINDOW_ORDER   16
+#define mf_pos_t       u16
+#define MF_SUFFIX
 
 /*
  * Note: although XPRESS can potentially use a sliding window, it isn't well
@@ -59,8 +59,6 @@
  * optimizations.
  */
 
-#include <string.h>
-
 #include "wimlib/bitops.h"
 #include "wimlib/compress_common.h"
 #include "wimlib/compressor_ops.h"
@@ -214,7 +212,7 @@ struct xpress_output_bitstream {
        /* Pointer to the start of the output buffer.  */
        u8 *start;
 
-       /* Pointer to the location in the ouput buffer at which to write the
+       /* Pointer to the location in the output buffer at which to write the
         * next 16 bits.  */
        u8 *next_bits;
 
@@ -294,7 +292,7 @@ xpress_write_bits(struct xpress_output_bitstream *os,
        if (os->bitcount > 16) {
                os->bitcount -= 16;
                if (os->end - os->next_byte >= 2) {
-                       put_unaligned_u16_le(os->bitbuf >> os->bitcount, os->next_bits);
+                       put_unaligned_le16(os->bitbuf >> os->bitcount, os->next_bits);
                        os->next_bits = os->next_bits2;
                        os->next_bits2 = os->next_byte;
                        os->next_byte += 2;
@@ -319,7 +317,7 @@ static inline void
 xpress_write_u16(struct xpress_output_bitstream *os, u16 v)
 {
        if (os->end - os->next_byte >= 2) {
-               put_unaligned_u16_le(v, os->next_byte);
+               put_unaligned_le16(v, os->next_byte);
                os->next_byte += 2;
        }
 }
@@ -334,8 +332,8 @@ xpress_flush_output(struct xpress_output_bitstream *os)
        if (os->end - os->next_byte < 2)
                return 0;
 
-       put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), os->next_bits);
-       put_unaligned_u16_le(0, os->next_bits2);
+       put_unaligned_le16(os->bitbuf << (16 - os->bitcount), os->next_bits);
+       put_unaligned_le16(0, os->next_bits2);
 
        return os->next_byte - os->start;
 }
@@ -396,11 +394,11 @@ xpress_write_item_list(struct xpress_output_bitstream *os,
                       struct xpress_optimum_node *optimum_nodes,
                       size_t count, const u32 codewords[], const u8 lens[])
 {
-       struct xpress_optimum_node *cur_optimum_ptr = optimum_nodes;
-       struct xpress_optimum_node *end_optimum_ptr = optimum_nodes + count;
+       struct xpress_optimum_node *cur_node = optimum_nodes;
+       struct xpress_optimum_node *end_node = optimum_nodes + count;
        do {
-               unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
-               unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+               unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+               unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
 
                if (length == 1) {
                        /* Literal  */
@@ -410,22 +408,22 @@ xpress_write_item_list(struct xpress_output_bitstream *os,
                } else {
                        /* Match  */
                        unsigned adjusted_len;
-                       unsigned offset_high_bit;
+                       unsigned log2_offset;
                        unsigned len_hdr;
                        unsigned sym;
 
                        adjusted_len = length - XPRESS_MIN_MATCH_LEN;
-                       offset_high_bit = fls32(offset);
+                       log2_offset = fls32(offset);
                        len_hdr = min(0xF, adjusted_len);
-                       sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+                       sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
 
                        xpress_write_bits(os, codewords[sym], lens[sym]);
                        xpress_write_extra_length_bytes(os, adjusted_len);
-                       xpress_write_bits(os, offset - (1U << offset_high_bit),
-                                         offset_high_bit);
+                       xpress_write_bits(os, offset - (1U << log2_offset),
+                                         log2_offset);
                }
-               cur_optimum_ptr += length;
-       } while (cur_optimum_ptr != end_optimum_ptr);
+               cur_node += length;
+       } while (cur_node != end_node);
 }
 #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
 
@@ -503,16 +501,16 @@ xpress_record_match(struct xpress_compressor *c, unsigned length, unsigned offse
 {
        unsigned adjusted_len = length - XPRESS_MIN_MATCH_LEN;
        unsigned len_hdr = min(adjusted_len, 0xF);
-       unsigned offset_high_bit = fls32(offset);
-       unsigned sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+       unsigned log2_offset = fls32(offset);
+       unsigned sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
 
        c->freqs[sym]++;
 
        return (struct xpress_item) {
                .data = (u64)sym |
                        ((u64)adjusted_len << 9) |
-                       ((u64)offset_high_bit << 25) |
-                       ((u64)(offset ^ (1U << offset_high_bit)) << 29),
+                       ((u64)log2_offset << 25) |
+                       ((u64)(offset ^ (1U << log2_offset)) << 29),
        };
 }
 
@@ -531,6 +529,7 @@ xpress_compress_greedy(struct xpress_compressor * restrict c,
        const u8 * const in_end = in_begin + in_nbytes;
        struct xpress_item *next_chosen_item = c->chosen_items;
        unsigned len_3_too_far;
+       u32 next_hashes[2] = {};
 
        if (in_nbytes <= 8192)
                len_3_too_far = 2048;
@@ -545,11 +544,12 @@ xpress_compress_greedy(struct xpress_compressor * restrict c,
 
                length = hc_matchfinder_longest_match(&c->hc_mf,
                                                      in_begin,
-                                                     in_next,
+                                                     in_next - in_begin,
                                                      XPRESS_MIN_MATCH_LEN - 1,
                                                      in_end - in_next,
                                                      min(in_end - in_next, c->nice_match_length),
                                                      c->max_search_depth,
+                                                     next_hashes,
                                                      &offset);
                if (length >= XPRESS_MIN_MATCH_LEN &&
                    !(length == XPRESS_MIN_MATCH_LEN && offset >= len_3_too_far))
@@ -560,9 +560,10 @@ xpress_compress_greedy(struct xpress_compressor * restrict c,
                        in_next += 1;
                        hc_matchfinder_skip_positions(&c->hc_mf,
                                                      in_begin,
-                                                     in_next,
-                                                     in_end,
-                                                     length - 1);
+                                                     in_next - in_begin,
+                                                     in_end - in_begin,
+                                                     length - 1,
+                                                     next_hashes);
                        in_next += length - 1;
                } else {
                        /* No match found  */
@@ -591,6 +592,7 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
        const u8 * const in_end = in_begin + in_nbytes;
        struct xpress_item *next_chosen_item = c->chosen_items;
        unsigned len_3_too_far;
+       u32 next_hashes[2] = {};
 
        if (in_nbytes <= 8192)
                len_3_too_far = 2048;
@@ -608,11 +610,12 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                /* Find the longest match at the current position.  */
                cur_len = hc_matchfinder_longest_match(&c->hc_mf,
                                                       in_begin,
-                                                      in_next,
+                                                      in_next - in_begin,
                                                       XPRESS_MIN_MATCH_LEN - 1,
                                                       in_end - in_next,
                                                       min(in_end - in_next, c->nice_match_length),
                                                       c->max_search_depth,
+                                                      next_hashes,
                                                       &cur_offset);
                in_next += 1;
 
@@ -637,9 +640,10 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
 
                        hc_matchfinder_skip_positions(&c->hc_mf,
                                                      in_begin,
-                                                     in_next,
-                                                     in_end,
-                                                     cur_len - 1);
+                                                     in_next - in_begin,
+                                                     in_end - in_begin,
+                                                     cur_len - 1,
+                                                     next_hashes);
                        in_next += cur_len - 1;
                        continue;
                }
@@ -662,11 +666,12 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                 */
                next_len = hc_matchfinder_longest_match(&c->hc_mf,
                                                        in_begin,
-                                                       in_next,
+                                                       in_next - in_begin,
                                                        cur_len,
                                                        in_end - in_next,
                                                        min(in_end - in_next, c->nice_match_length),
                                                        c->max_search_depth / 2,
+                                                       next_hashes,
                                                        &next_offset);
                in_next += 1;
 
@@ -685,9 +690,10 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                                xpress_record_match(c, cur_len, cur_offset);
                        hc_matchfinder_skip_positions(&c->hc_mf,
                                                      in_begin,
-                                                     in_next,
-                                                     in_end,
-                                                     cur_len - 2);
+                                                     in_next - in_begin,
+                                                     in_end - in_begin,
+                                                     cur_len - 2,
+                                                     next_hashes);
                        in_next += cur_len - 2;
                        continue;
                }
@@ -728,13 +734,13 @@ xpress_update_costs(struct xpress_compressor *c)
  */
 static void
 xpress_tally_item_list(struct xpress_compressor *c,
-                      struct xpress_optimum_node *end_optimum_ptr)
+                      struct xpress_optimum_node *end_node)
 {
-       struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes;
+       struct xpress_optimum_node *cur_node = c->optimum_nodes;
 
        do {
-               unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
-               unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+               unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+               unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
 
                if (length == 1) {
                        /* Literal  */
@@ -744,19 +750,19 @@ xpress_tally_item_list(struct xpress_compressor *c,
                } else {
                        /* Match  */
                        unsigned adjusted_len;
-                       unsigned offset_high_bit;
+                       unsigned log2_offset;
                        unsigned len_hdr;
                        unsigned sym;
 
                        adjusted_len = length - XPRESS_MIN_MATCH_LEN;
-                       offset_high_bit = fls32(offset);
+                       log2_offset = fls32(offset);
                        len_hdr = min(0xF, adjusted_len);
-                       sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+                       sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
 
                        c->freqs[sym]++;
                }
-               cur_optimum_ptr += length;
-       } while (cur_optimum_ptr != end_optimum_ptr);
+               cur_node += length;
+       } while (cur_node != end_node);
 }
 
 /*
@@ -775,10 +781,10 @@ static void
 xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                          struct lz_match *end_cache_ptr)
 {
-       struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes + in_nbytes;
+       struct xpress_optimum_node *cur_node = c->optimum_nodes + in_nbytes;
        struct lz_match *cache_ptr = end_cache_ptr;
 
-       cur_optimum_ptr->cost_to_end = 0;
+       cur_node->cost_to_end = 0;
        do {
                unsigned literal;
                u32 best_item;
@@ -787,7 +793,7 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                struct lz_match *match;
                unsigned len;
 
-               cur_optimum_ptr--;
+               cur_node--;
                cache_ptr--;
 
                literal = cache_ptr->offset;
@@ -795,14 +801,14 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                /* Consider coding a literal.  */
                best_item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
                best_cost_to_end = c->costs[literal] +
-                                  (cur_optimum_ptr + 1)->cost_to_end;
+                                  (cur_node + 1)->cost_to_end;
 
                num_matches = cache_ptr->length;
 
                if (num_matches == 0) {
                        /* No matches; the only choice is the literal.  */
-                       cur_optimum_ptr->cost_to_end = best_cost_to_end;
-                       cur_optimum_ptr->item = best_item;
+                       cur_node->cost_to_end = best_cost_to_end;
+                       cur_node->item = best_item;
                        continue;
                }
 
@@ -821,12 +827,12 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                        /* All lengths are small.  Optimize accordingly.  */
                        do {
                                unsigned offset;
-                               unsigned offset_high_bit;
+                               unsigned log2_offset;
                                u32 offset_cost;
 
                                offset = match->offset;
-                               offset_high_bit = fls32(offset);
-                               offset_cost = offset_high_bit;
+                               log2_offset = fls32(offset);
+                               offset_cost = log2_offset;
                                do {
                                        unsigned len_hdr;
                                        unsigned sym;
@@ -834,10 +840,10 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
 
                                        len_hdr = len - XPRESS_MIN_MATCH_LEN;
                                        sym = XPRESS_NUM_CHARS +
-                                             ((offset_high_bit << 4) | len_hdr);
+                                             ((log2_offset << 4) | len_hdr);
                                        cost_to_end =
                                                offset_cost + c->costs[sym] +
-                                               (cur_optimum_ptr + len)->cost_to_end;
+                                               (cur_node + len)->cost_to_end;
                                        if (cost_to_end < best_cost_to_end) {
                                                best_cost_to_end = cost_to_end;
                                                best_item =
@@ -850,12 +856,12 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                        /* Some lengths are big.  */
                        do {
                                unsigned offset;
-                               unsigned offset_high_bit;
+                               unsigned log2_offset;
                                u32 offset_cost;
 
                                offset = match->offset;
-                               offset_high_bit = fls32(offset);
-                               offset_cost = offset_high_bit;
+                               log2_offset = fls32(offset);
+                               offset_cost = log2_offset;
                                do {
                                        unsigned adjusted_len;
                                        unsigned len_hdr;
@@ -865,10 +871,10 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                                        adjusted_len = len - XPRESS_MIN_MATCH_LEN;
                                        len_hdr = min(adjusted_len, 0xF);
                                        sym = XPRESS_NUM_CHARS +
-                                             ((offset_high_bit << 4) | len_hdr);
+                                             ((log2_offset << 4) | len_hdr);
                                        cost_to_end =
                                                offset_cost + c->costs[sym] +
-                                               (cur_optimum_ptr + len)->cost_to_end;
+                                               (cur_node + len)->cost_to_end;
                                        if (adjusted_len >= 0xF) {
                                                cost_to_end += 8;
                                                if (adjusted_len - 0xF >= 0xFF)
@@ -884,9 +890,9 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                        } while (++match != cache_ptr);
                }
                cache_ptr -= num_matches;
-               cur_optimum_ptr->cost_to_end = best_cost_to_end;
-               cur_optimum_ptr->item = best_item;
-       } while (cur_optimum_ptr != c->optimum_nodes);
+               cur_node->cost_to_end = best_cost_to_end;
+               cur_node->item = best_item;
+       } while (cur_node != c->optimum_nodes);
 }
 
 /*
@@ -900,28 +906,23 @@ xpress_find_matches(struct xpress_compressor * restrict c,
 {
        const u8 * const in_begin = in;
        const u8 *in_next = in_begin;
-       const u8 * const in_end = in_begin + in_nbytes;
        struct lz_match *cache_ptr = c->match_cache;
-       u32 next_hash;
+       u32 next_hashes[2] = {};
+       u32 max_len = in_nbytes;
+       u32 nice_len = min(max_len, c->nice_match_length);
 
        bt_matchfinder_init(&c->bt_mf);
-       next_hash = bt_matchfinder_hash_3_bytes(in_next);
 
-       do {
+       for (;;) {
                struct lz_match *matches;
-               unsigned best_len;
+               u32 best_len;
 
                /* If we've found so many matches that the cache might overflow
                 * if we keep finding more, then stop finding matches.  This
                 * case is very unlikely.  */
-               if (unlikely(cache_ptr >= c->cache_overflow_mark)) {
-                       do {
-                               cache_ptr->length = 0;
-                               cache_ptr->offset = *in_next++;
-                               cache_ptr++;
-                       } while (in_next != in_end);
-                       return cache_ptr;
-               }
+               if (unlikely(cache_ptr >= c->cache_overflow_mark ||
+                            max_len < BT_MATCHFINDER_REQUIRED_NBYTES))
+                       break;
 
                matches = cache_ptr;
 
@@ -931,18 +932,18 @@ xpress_find_matches(struct xpress_compressor * restrict c,
                cache_ptr =
                        bt_matchfinder_get_matches(&c->bt_mf,
                                                   in_begin,
-                                                  in_next,
-                                                  XPRESS_MIN_MATCH_LEN,
-                                                  in_end - in_next,
-                                                  min(in_end - in_next, c->nice_match_length),
+                                                  in_next - in_begin,
+                                                  max_len,
+                                                  nice_len,
                                                   c->max_search_depth,
-                                                  &next_hash,
+                                                  next_hashes,
                                                   &best_len,
                                                   cache_ptr);
                cache_ptr->length = cache_ptr - matches;
-               cache_ptr->offset = *in_next;
-               in_next++;
+               cache_ptr->offset = *in_next++;
                cache_ptr++;
+               max_len--;
+               nice_len = min(nice_len, max_len);
 
                /*
                 * If there was a very long match found, then don't cache any
@@ -954,24 +955,32 @@ xpress_find_matches(struct xpress_compressor * restrict c,
                 * very much.  If there's a long match, then the data must be
                 * highly compressible, so it doesn't matter as much what we do.
                 */
-               if (best_len >= c->nice_match_length) {
+               if (best_len >= nice_len) {
+                       if (unlikely(best_len +
+                                    BT_MATCHFINDER_REQUIRED_NBYTES >= max_len))
+                               break;
                        --best_len;
                        do {
                                bt_matchfinder_skip_position(&c->bt_mf,
                                                             in_begin,
-                                                            in_next,
-                                                            in_end,
-                                                            min(in_end - in_next,
-                                                                c->nice_match_length),
+                                                            in_next - in_begin,
+                                                            nice_len,
                                                             c->max_search_depth,
-                                                            &next_hash);
-
+                                                            next_hashes);
                                cache_ptr->length = 0;
                                cache_ptr->offset = *in_next++;
                                cache_ptr++;
+                               max_len--;
+                               nice_len = min(nice_len, max_len);
                        } while (--best_len);
                }
-       } while (in_next != in_end);
+       }
+
+       while (max_len--) {
+               cache_ptr->length = 0;
+               cache_ptr->offset = *in_next++;
+               cache_ptr++;
+       }
 
        return cache_ptr;
 }
@@ -1068,8 +1077,7 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
        if (max_bufsize > XPRESS_MAX_BUFSIZE)
                return WIMLIB_ERR_INVALID_PARAM;
 
-       c = ALIGNED_MALLOC(xpress_get_compressor_size(max_bufsize, compression_level),
-                          MATCHFINDER_ALIGNMENT);
+       c = MALLOC(xpress_get_compressor_size(max_bufsize, compression_level));
        if (!c)
                goto oom0;
 
@@ -1083,12 +1091,12 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
 
                if (compression_level < 30) {
                        c->impl = xpress_compress_greedy;
-                       c->max_search_depth = (compression_level * 24) / 16;
-                       c->nice_match_length = (compression_level * 48) / 16;
+                       c->max_search_depth = (compression_level * 30) / 16;
+                       c->nice_match_length = (compression_level * 60) / 16;
                } else {
                        c->impl = xpress_compress_lazy;
-                       c->max_search_depth = (compression_level * 24) / 32;
-                       c->nice_match_length = (compression_level * 48) / 32;
+                       c->max_search_depth = (compression_level * 30) / 32;
+                       c->nice_match_length = (compression_level * 60) / 32;
 
                        /* xpress_compress_lazy() needs max_search_depth >= 2
                         * because it halves the max_search_depth when
@@ -1115,8 +1123,8 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
                        &c->match_cache[max_bufsize * CACHE_RESERVE_PER_POS];
 
                c->impl = xpress_compress_near_optimal;
-               c->max_search_depth = (compression_level * 32) / 100;
-               c->nice_match_length = (compression_level * 50) / 100;
+               c->max_search_depth = (compression_level * 28) / 100;
+               c->nice_match_length = (compression_level * 56) / 100;
                c->num_optim_passes = compression_level / 40;
        }
 #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
@@ -1129,14 +1137,14 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
        return 0;
 
 oom1:
-       ALIGNED_FREE(c);
+       FREE(c);
 oom0:
        return WIMLIB_ERR_NOMEM;
 }
 
 static size_t
-xpress_compress(const void *in, size_t in_nbytes,
-               void *out, size_t out_nbytes_avail, void *_c)
+xpress_compress(const void *restrict in, size_t in_nbytes,
+               void *restrict out, size_t out_nbytes_avail, void *restrict _c)
 {
        struct xpress_compressor *c = _c;
 
@@ -1164,7 +1172,7 @@ xpress_free_compressor(void *_c)
        } else
 #endif
                FREE(c->chosen_items);
-       ALIGNED_FREE(c);
+       FREE(c);
 }
 
 const struct compressor_ops xpress_compressor_ops = {