]> wimlib.net Git - wimlib/blobdiff - include/wimlib/hc_matchfinder.h
mount_image.c: add fallback definitions of RENAME_* constants
[wimlib] / include / wimlib / hc_matchfinder.h
index e1da62b5ca1a388566419f98030705971e9e31d3..a96aa6511184ed7d99560b09c21f1a0664a1c394 100644 (file)
@@ -1,11 +1,28 @@
 /*
- * hc_matchfinder.h
+ * hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists
  *
- * Author:     Eric Biggers
- * Year:       2014, 2015
+ * Copyright 2022 Eric Biggers
  *
- * The author dedicates this file to the public domain.
- * You can do whatever you want with this file.
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
  *
  * ---------------------------------------------------------------------------
  *
@@ -71,7 +88,7 @@
  * chain for length 3+ matches, the algorithm just checks for one close length 3
  * match, then focuses on finding length 4+ matches.
  *
- * The longest_match() and skip_positions() functions are inlined into the
+ * The longest_match() and skip_bytes() functions are inlined into the
  * compressors that use them.  This isn't just about saving the overhead of a
  * function call.  These functions are intended to be called from the inner
  * loops of compressors, where giving the compiler more control over register
 
 #include <string.h>
 
-#include "wimlib/lz_extend.h"
-#include "wimlib/lz_hash.h"
-#include "wimlib/unaligned.h"
+#include "wimlib/matchfinder_common.h"
 
-#define HC_MATCHFINDER_HASH3_ORDER     14
-#define HC_MATCHFINDER_HASH4_ORDER     15
+#define HC_MATCHFINDER_HASH3_ORDER     15
+#define HC_MATCHFINDER_HASH4_ORDER     16
 
 /* TEMPLATED functions and structures have MF_SUFFIX appended to their name.  */
 #undef TEMPLATED
@@ -131,7 +146,7 @@ struct TEMPLATED(hc_matchfinder) {
 
 /* Return the number of bytes that must be allocated for a 'hc_matchfinder' that
  * can work with buffers up to the specified size.  */
-static inline size_t
+static forceinline size_t
 TEMPLATED(hc_matchfinder_size)(size_t max_bufsize)
 {
        return sizeof(struct TEMPLATED(hc_matchfinder)) +
@@ -139,7 +154,7 @@ TEMPLATED(hc_matchfinder_size)(size_t max_bufsize)
 }
 
 /* Prepare the matchfinder for a new input buffer.  */
-static inline void
+static forceinline void
 TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf)
 {
        memset(mf, 0, sizeof(*mf));
@@ -152,9 +167,9 @@ TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf)
  *     The matchfinder structure.
  * @in_begin
  *     Pointer to the beginning of the input buffer.
- * @cur_pos
- *     The current position in the input buffer (the position of the sequence
- *     being matched against).
+ * @in_next
+ *     Pointer to the next position in the input buffer, i.e. the sequence
+ *     being matched against.
  * @best_len
  *     Require a match longer than this length.
  * @max_len
@@ -174,26 +189,26 @@ TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf)
  * Return the length of the match found, or 'best_len' if no match longer than
  * 'best_len' was found.
  */
-static inline u32
-TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const restrict mf,
-                                       const u8 * const restrict in_begin,
-                                       const ptrdiff_t cur_pos,
+static forceinline u32
+TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const mf,
+                                       const u8 * const in_begin,
+                                       const u8 * const in_next,
                                        u32 best_len,
                                        const u32 max_len,
                                        const u32 nice_len,
                                        const u32 max_search_depth,
-                                       u32 next_hashes[const restrict static 2],
-                                       u32 * const restrict offset_ret)
+                                       u32 * const next_hashes,
+                                       u32 * const offset_ret)
 {
-       const u8 *in_next = in_begin + cur_pos;
        u32 depth_remaining = max_search_depth;
-       const u8 *best_matchptr = best_matchptr; /* uninitialized */
+       const u8 *best_matchptr = in_next;
        mf_pos_t cur_node3, cur_node4;
        u32 hash3, hash4;
-       u32 next_seq3, next_seq4;
+       u32 next_hashseq;
        u32 seq4;
        const u8 *matchptr;
        u32 len;
+       u32 cur_pos = in_next - in_begin;
 
        if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */
                goto out;
@@ -216,10 +231,9 @@ TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const
        mf->next_tab[cur_pos] = cur_node4;
 
        /* Compute the next hash codes.  */
-       next_seq4 = load_u32_unaligned(in_next + 1);
-       next_seq3 = loaded_u32_to_u24(next_seq4);
-       next_hashes[0] = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER);
-       next_hashes[1] = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER);
+       next_hashseq = get_unaligned_le32(in_next + 1);
+       next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
+       next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
        prefetchw(&mf->hash3_tab[next_hashes[0]]);
        prefetchw(&mf->hash4_tab[next_hashes[1]]);
 
@@ -329,54 +343,49 @@ out:
  *     The matchfinder structure.
  * @in_begin
  *     Pointer to the beginning of the input buffer.
- * @cur_pos
- *     The current position in the input buffer (the position of the sequence
- *     being matched against).
- * @end_pos
- *     The length of the input buffer.
+ * @in_next
+ *     Pointer to the next position in the input buffer.
+ * @in_end
+ *     Pointer to the end of the input buffer.
+ * @count
+ *     The number of bytes to advance.  Must be > 0.
  * @next_hashes
  *     The precomputed hash codes for the sequence beginning at @in_next.
  *     These will be used and then updated with the precomputed hashcodes for
  *     the sequence beginning at @in_next + @count.
- * @count
- *     The number of bytes to advance.  Must be > 0.
- *
- * Returns @in_next + @count.
  */
-static inline const u8 *
-TEMPLATED(hc_matchfinder_skip_positions)(struct TEMPLATED(hc_matchfinder) * const restrict mf,
-                                        const u8 * const restrict in_begin,
-                                        const ptrdiff_t cur_pos,
-                                        const ptrdiff_t end_pos,
-                                        const u32 count,
-                                        u32 next_hashes[const restrict static 2])
+static forceinline void
+TEMPLATED(hc_matchfinder_skip_bytes)(struct TEMPLATED(hc_matchfinder) * const mf,
+                                    const u8 * const in_begin,
+                                    const u8 *in_next,
+                                    const u8 * const in_end,
+                                    const u32 count,
+                                    u32 * const next_hashes)
 {
-       const u8 *in_next = in_begin + cur_pos;
-       const u8 * const stop_ptr = in_next + count;
-
-       if (likely(count + 5 <= end_pos - cur_pos)) {
-               u32 hash3, hash4;
-               u32 next_seq3, next_seq4;
-
-               hash3 = next_hashes[0];
-               hash4 = next_hashes[1];
-               do {
-                       mf->hash3_tab[hash3] = in_next - in_begin;
-                       mf->next_tab[in_next - in_begin] = mf->hash4_tab[hash4];
-                       mf->hash4_tab[hash4] = in_next - in_begin;
-
-                       next_seq4 = load_u32_unaligned(++in_next);
-                       next_seq3 = loaded_u32_to_u24(next_seq4);
-                       hash3 = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER);
-                       hash4 = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER);
-
-               } while (in_next != stop_ptr);
-
-               prefetchw(&mf->hash3_tab[hash3]);
-               prefetchw(&mf->hash4_tab[hash4]);
-               next_hashes[0] = hash3;
-               next_hashes[1] = hash4;
-       }
+       u32 cur_pos;
+       u32 hash3, hash4;
+       u32 next_hashseq;
+       u32 remaining = count;
 
-       return stop_ptr;
+       if (unlikely(count + 5 > in_end - in_next))
+               return;
+
+       cur_pos = in_next - in_begin;
+       hash3 = next_hashes[0];
+       hash4 = next_hashes[1];
+       do {
+               mf->hash3_tab[hash3] = cur_pos;
+               mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
+               mf->hash4_tab[hash4] = cur_pos;
+
+               next_hashseq = get_unaligned_le32(++in_next);
+               hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
+               hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
+               cur_pos++;
+       } while (--remaining);
+
+       prefetchw(&mf->hash3_tab[hash3]);
+       prefetchw(&mf->hash4_tab[hash4]);
+       next_hashes[0] = hash3;
+       next_hashes[1] = hash4;
 }