]> wimlib.net Git - wimlib/commitdiff
hc_matchfinder: sync with libdeflate
authorEric Biggers <ebiggers3@gmail.com>
Sun, 31 Jul 2022 02:03:42 +0000 (19:03 -0700)
committerEric Biggers <ebiggers3@gmail.com>
Sun, 31 Jul 2022 02:03:42 +0000 (19:03 -0700)
include/wimlib/hc_matchfinder.h
src/lzx_compress.c
src/xpress_compress.c

index a790c6ad04026610d21058aac55bad7b7e107aca..2c28a64633c54b93abe883a157b7728e0a92313c 100644 (file)
@@ -88,7 +88,7 @@
  * chain for length 3+ matches, the algorithm just checks for one close length 3
  * match, then focuses on finding length 4+ matches.
  *
  * chain for length 3+ matches, the algorithm just checks for one close length 3
  * match, then focuses on finding length 4+ matches.
  *
- * The longest_match() and skip_positions() functions are inlined into the
+ * The longest_match() and skip_bytes() functions are inlined into the
  * compressors that use them.  This isn't just about saving the overhead of a
  * function call.  These functions are intended to be called from the inner
  * loops of compressors, where giving the compiler more control over register
  * compressors that use them.  This isn't just about saving the overhead of a
  * function call.  These functions are intended to be called from the inner
  * loops of compressors, where giving the compiler more control over register
 #include "wimlib/lz_hash.h"
 #include "wimlib/unaligned.h"
 
 #include "wimlib/lz_hash.h"
 #include "wimlib/unaligned.h"
 
-#define HC_MATCHFINDER_HASH3_ORDER     14
-#define HC_MATCHFINDER_HASH4_ORDER     15
+#define HC_MATCHFINDER_HASH3_ORDER     15
+#define HC_MATCHFINDER_HASH4_ORDER     16
 
 /* TEMPLATED functions and structures have MF_SUFFIX appended to their name.  */
 #undef TEMPLATED
 
 /* TEMPLATED functions and structures have MF_SUFFIX appended to their name.  */
 #undef TEMPLATED
@@ -169,9 +169,9 @@ TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf)
  *     The matchfinder structure.
  * @in_begin
  *     Pointer to the beginning of the input buffer.
  *     The matchfinder structure.
  * @in_begin
  *     Pointer to the beginning of the input buffer.
- * @cur_pos
- *     The current position in the input buffer (the position of the sequence
- *     being matched against).
+ * @in_next
+ *     Pointer to the next position in the input buffer, i.e. the sequence
+ *     being matched against.
  * @best_len
  *     Require a match longer than this length.
  * @max_len
  * @best_len
  *     Require a match longer than this length.
  * @max_len
@@ -192,25 +192,25 @@ TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf)
  * 'best_len' was found.
  */
 static forceinline u32
  * 'best_len' was found.
  */
 static forceinline u32
-TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const restrict mf,
-                                       const u8 * const restrict in_begin,
-                                       const ptrdiff_t cur_pos,
+TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const mf,
+                                       const u8 * const in_begin,
+                                       const u8 * const in_next,
                                        u32 best_len,
                                        const u32 max_len,
                                        const u32 nice_len,
                                        const u32 max_search_depth,
                                        u32 best_len,
                                        const u32 max_len,
                                        const u32 nice_len,
                                        const u32 max_search_depth,
-                                       u32 next_hashes[const restrict static 2],
-                                       u32 * const restrict offset_ret)
+                                       u32 * const next_hashes,
+                                       u32 * const offset_ret)
 {
 {
-       const u8 *in_next = in_begin + cur_pos;
        u32 depth_remaining = max_search_depth;
        const u8 *best_matchptr = in_next;
        mf_pos_t cur_node3, cur_node4;
        u32 hash3, hash4;
        u32 depth_remaining = max_search_depth;
        const u8 *best_matchptr = in_next;
        mf_pos_t cur_node3, cur_node4;
        u32 hash3, hash4;
-       u32 next_seq3, next_seq4;
+       u32 next_hashseq;
        u32 seq4;
        const u8 *matchptr;
        u32 len;
        u32 seq4;
        const u8 *matchptr;
        u32 len;
+       u32 cur_pos = in_next - in_begin;
 
        if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */
                goto out;
 
        if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */
                goto out;
@@ -233,10 +233,9 @@ TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const
        mf->next_tab[cur_pos] = cur_node4;
 
        /* Compute the next hash codes.  */
        mf->next_tab[cur_pos] = cur_node4;
 
        /* Compute the next hash codes.  */
-       next_seq4 = load_u32_unaligned(in_next + 1);
-       next_seq3 = loaded_u32_to_u24(next_seq4);
-       next_hashes[0] = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER);
-       next_hashes[1] = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER);
+       next_hashseq = get_unaligned_le32(in_next + 1);
+       next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
+       next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
        prefetchw(&mf->hash3_tab[next_hashes[0]]);
        prefetchw(&mf->hash4_tab[next_hashes[1]]);
 
        prefetchw(&mf->hash3_tab[next_hashes[0]]);
        prefetchw(&mf->hash4_tab[next_hashes[1]]);
 
@@ -346,54 +345,49 @@ out:
  *     The matchfinder structure.
  * @in_begin
  *     Pointer to the beginning of the input buffer.
  *     The matchfinder structure.
  * @in_begin
  *     Pointer to the beginning of the input buffer.
- * @cur_pos
- *     The current position in the input buffer (the position of the sequence
- *     being matched against).
- * @end_pos
- *     The length of the input buffer.
+ * @in_next
+ *     Pointer to the next position in the input buffer.
+ * @in_end
+ *     Pointer to the end of the input buffer.
+ * @count
+ *     The number of bytes to advance.  Must be > 0.
  * @next_hashes
  *     The precomputed hash codes for the sequence beginning at @in_next.
  *     These will be used and then updated with the precomputed hashcodes for
  *     the sequence beginning at @in_next + @count.
  * @next_hashes
  *     The precomputed hash codes for the sequence beginning at @in_next.
  *     These will be used and then updated with the precomputed hashcodes for
  *     the sequence beginning at @in_next + @count.
- * @count
- *     The number of bytes to advance.  Must be > 0.
- *
- * Returns @in_next + @count.
  */
  */
-static forceinline const u8 *
-TEMPLATED(hc_matchfinder_skip_positions)(struct TEMPLATED(hc_matchfinder) * const restrict mf,
-                                        const u8 * const restrict in_begin,
-                                        const ptrdiff_t cur_pos,
-                                        const ptrdiff_t end_pos,
-                                        const u32 count,
-                                        u32 next_hashes[const restrict static 2])
+static forceinline void
+TEMPLATED(hc_matchfinder_skip_bytes)(struct TEMPLATED(hc_matchfinder) * const mf,
+                                    const u8 * const in_begin,
+                                    const u8 *in_next,
+                                    const u8 * const in_end,
+                                    const u32 count,
+                                    u32 * const next_hashes)
 {
 {
-       const u8 *in_next = in_begin + cur_pos;
-       const u8 * const stop_ptr = in_next + count;
-
-       if (likely(count + 5 <= end_pos - cur_pos)) {
-               u32 hash3, hash4;
-               u32 next_seq3, next_seq4;
-
-               hash3 = next_hashes[0];
-               hash4 = next_hashes[1];
-               do {
-                       mf->hash3_tab[hash3] = in_next - in_begin;
-                       mf->next_tab[in_next - in_begin] = mf->hash4_tab[hash4];
-                       mf->hash4_tab[hash4] = in_next - in_begin;
-
-                       next_seq4 = load_u32_unaligned(++in_next);
-                       next_seq3 = loaded_u32_to_u24(next_seq4);
-                       hash3 = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER);
-                       hash4 = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER);
-
-               } while (in_next != stop_ptr);
-
-               prefetchw(&mf->hash3_tab[hash3]);
-               prefetchw(&mf->hash4_tab[hash4]);
-               next_hashes[0] = hash3;
-               next_hashes[1] = hash4;
-       }
+       u32 cur_pos;
+       u32 hash3, hash4;
+       u32 next_hashseq;
+       u32 remaining = count;
+
+       if (unlikely(count + 5 > in_end - in_next))
+               return;
 
 
-       return stop_ptr;
+       cur_pos = in_next - in_begin;
+       hash3 = next_hashes[0];
+       hash4 = next_hashes[1];
+       do {
+               mf->hash3_tab[hash3] = cur_pos;
+               mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
+               mf->hash4_tab[hash4] = cur_pos;
+
+               next_hashseq = get_unaligned_le32(++in_next);
+               hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
+               hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
+               cur_pos++;
+       } while (--remaining);
+
+       prefetchw(&mf->hash3_tab[hash3]);
+       prefetchw(&mf->hash4_tab[hash4]);
+       next_hashes[0] = hash3;
+       next_hashes[1] = hash4;
 }
 }
index 110db5dd3fc7eaf87a2e2d1991bc1d174bebd7b1..89031d4e6b2dafee3f209c52479f9b30f1b9e717 100644 (file)
@@ -2569,7 +2569,7 @@ lzx_compress_lazy(struct lzx_compressor * restrict c,
                        cur_len = CALL_HC_MF(is_16_bit, c,
                                             hc_matchfinder_longest_match,
                                             in_begin,
                        cur_len = CALL_HC_MF(is_16_bit, c,
                                             hc_matchfinder_longest_match,
                                             in_begin,
-                                            in_next - in_begin,
+                                            in_next,
                                             2,
                                             max_len,
                                             nice_len,
                                             2,
                                             max_len,
                                             nice_len,
@@ -2646,7 +2646,7 @@ lzx_compress_lazy(struct lzx_compressor * restrict c,
                        next_len = CALL_HC_MF(is_16_bit, c,
                                              hc_matchfinder_longest_match,
                                              in_begin,
                        next_len = CALL_HC_MF(is_16_bit, c,
                                              hc_matchfinder_longest_match,
                                              in_begin,
-                                             in_next - in_begin,
+                                             in_next,
                                              cur_len - 2,
                                              max_len,
                                              nice_len,
                                              cur_len - 2,
                                              max_len,
                                              nice_len,
@@ -2707,13 +2707,14 @@ lzx_compress_lazy(struct lzx_compressor * restrict c,
                        lzx_choose_match(c, cur_len, cur_adjusted_offset,
                                         recent_offsets, is_16_bit,
                                         &litrunlen, &next_seq);
                        lzx_choose_match(c, cur_len, cur_adjusted_offset,
                                         recent_offsets, is_16_bit,
                                         &litrunlen, &next_seq);
-                       in_next = CALL_HC_MF(is_16_bit, c,
-                                            hc_matchfinder_skip_positions,
-                                            in_begin,
-                                            in_next - in_begin,
-                                            in_end - in_begin,
-                                            skip_len,
-                                            next_hashes);
+                       CALL_HC_MF(is_16_bit, c,
+                                  hc_matchfinder_skip_bytes,
+                                  in_begin,
+                                  in_next,
+                                  in_end,
+                                  skip_len,
+                                  next_hashes);
+                       in_next += skip_len;
 
                        /* Keep going until it's time to end the block. */
                } while (in_next < in_max_block_end &&
 
                        /* Keep going until it's time to end the block. */
                } while (in_next < in_max_block_end &&
index 1b430912de7521d5b2abb622f6b1a694f7ab6281..379388cd3602b0c1fa2d4b5ab48358100cc78a04 100644 (file)
@@ -544,7 +544,7 @@ xpress_compress_greedy(struct xpress_compressor * restrict c,
 
                length = hc_matchfinder_longest_match(&c->hc_mf,
                                                      in_begin,
 
                length = hc_matchfinder_longest_match(&c->hc_mf,
                                                      in_begin,
-                                                     in_next - in_begin,
+                                                     in_next,
                                                      XPRESS_MIN_MATCH_LEN - 1,
                                                      in_end - in_next,
                                                      min(in_end - in_next, c->nice_match_length),
                                                      XPRESS_MIN_MATCH_LEN - 1,
                                                      in_end - in_next,
                                                      min(in_end - in_next, c->nice_match_length),
@@ -558,12 +558,12 @@ xpress_compress_greedy(struct xpress_compressor * restrict c,
                        *next_chosen_item++ =
                                xpress_record_match(c, length, offset);
                        in_next += 1;
                        *next_chosen_item++ =
                                xpress_record_match(c, length, offset);
                        in_next += 1;
-                       hc_matchfinder_skip_positions(&c->hc_mf,
-                                                     in_begin,
-                                                     in_next - in_begin,
-                                                     in_end - in_begin,
-                                                     length - 1,
-                                                     next_hashes);
+                       hc_matchfinder_skip_bytes(&c->hc_mf,
+                                                 in_begin,
+                                                 in_next,
+                                                 in_end,
+                                                 length - 1,
+                                                 next_hashes);
                        in_next += length - 1;
                } else {
                        /* No match found  */
                        in_next += length - 1;
                } else {
                        /* No match found  */
@@ -610,7 +610,7 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                /* Find the longest match at the current position.  */
                cur_len = hc_matchfinder_longest_match(&c->hc_mf,
                                                       in_begin,
                /* Find the longest match at the current position.  */
                cur_len = hc_matchfinder_longest_match(&c->hc_mf,
                                                       in_begin,
-                                                      in_next - in_begin,
+                                                      in_next,
                                                       XPRESS_MIN_MATCH_LEN - 1,
                                                       in_end - in_next,
                                                       min(in_end - in_next, c->nice_match_length),
                                                       XPRESS_MIN_MATCH_LEN - 1,
                                                       in_end - in_next,
                                                       min(in_end - in_next, c->nice_match_length),
@@ -638,12 +638,12 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                        *next_chosen_item++ =
                                xpress_record_match(c, cur_len, cur_offset);
 
                        *next_chosen_item++ =
                                xpress_record_match(c, cur_len, cur_offset);
 
-                       hc_matchfinder_skip_positions(&c->hc_mf,
-                                                     in_begin,
-                                                     in_next - in_begin,
-                                                     in_end - in_begin,
-                                                     cur_len - 1,
-                                                     next_hashes);
+                       hc_matchfinder_skip_bytes(&c->hc_mf,
+                                                 in_begin,
+                                                 in_next,
+                                                 in_end,
+                                                 cur_len - 1,
+                                                 next_hashes);
                        in_next += cur_len - 1;
                        continue;
                }
                        in_next += cur_len - 1;
                        continue;
                }
@@ -666,7 +666,7 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                 */
                next_len = hc_matchfinder_longest_match(&c->hc_mf,
                                                        in_begin,
                 */
                next_len = hc_matchfinder_longest_match(&c->hc_mf,
                                                        in_begin,
-                                                       in_next - in_begin,
+                                                       in_next,
                                                        cur_len,
                                                        in_end - in_next,
                                                        min(in_end - in_next, c->nice_match_length),
                                                        cur_len,
                                                        in_end - in_next,
                                                        min(in_end - in_next, c->nice_match_length),
@@ -688,12 +688,12 @@ xpress_compress_lazy(struct xpress_compressor * restrict c,
                         * output the current match.  */
                        *next_chosen_item++ =
                                xpress_record_match(c, cur_len, cur_offset);
                         * output the current match.  */
                        *next_chosen_item++ =
                                xpress_record_match(c, cur_len, cur_offset);
-                       hc_matchfinder_skip_positions(&c->hc_mf,
-                                                     in_begin,
-                                                     in_next - in_begin,
-                                                     in_end - in_begin,
-                                                     cur_len - 2,
-                                                     next_hashes);
+                       hc_matchfinder_skip_bytes(&c->hc_mf,
+                                                 in_begin,
+                                                 in_next,
+                                                 in_end,
+                                                 cur_len - 2,
+                                                 next_hashes);
                        in_next += cur_len - 2;
                        continue;
                }
                        in_next += cur_len - 2;
                        continue;
                }