]> wimlib.net Git - wimlib/blobdiff - src/lz_hash_chains.c
win32_capture.c: Ignore unnamed data stream of reparse point
[wimlib] / src / lz_hash_chains.c
index 7eacac915d71dad4fe9343071b9e2f9635d7c2a4..46ea8130196305c2b2eff097526301b51d4f36cc 100644 (file)
@@ -162,91 +162,94 @@ lz_hc_load_window(struct lz_mf *_mf, const u8 window[], u32 size)
                mf->next_hash = lz_hc_hash(window);
 }
 
-static inline u32
-do_search(const u8 * restrict window,
-         const u32 cur_window_pos,
-         u32 prev_tab[restrict],
-         u32 cur_match,
-         const u32 min_len,
-         const u32 max_len,
-         const u32 max_search_depth,
-         struct lz_match matches[restrict])
-{
-       const u8 * const strptr = &window[cur_window_pos];
-       u32 best_len = min_len - 1;
-       u32 depth_remaining = max_search_depth;
-       u32 num_matches = 0;
-
-       for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) {
-
-               const u8 * const matchptr = &window[cur_match];
-
-               if (matchptr[best_len] == strptr[best_len] &&
-                   matchptr[best_len - 1] == strptr[best_len - 1] &&
-                   matchptr[0] == strptr[0])
-               {
-                       u32 len = 0;
-
-                       while (++len != max_len)
-                               if (matchptr[len] != strptr[len])
-                                       break;
-
-                       if (len > best_len) {
-                               matches[num_matches++] = (struct lz_match) {
-                                       .len = len,
-                                       .offset = strptr - matchptr,
-                               };
-                               best_len = len;
-                               if (best_len == max_len)
-                                       break;
-                       }
-               }
-       }
-       return num_matches;
-}
-
 static u32
 lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[])
 {
        struct lz_hc *mf = (struct lz_hc *)_mf;
-       const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
+       const u8 * const window = mf->base.cur_window;
+       const u32 cur_pos = mf->base.cur_window_pos;
+       const u8 * const strptr = &window[cur_pos];
+       const u32 bytes_remaining = mf->base.cur_window_size - cur_pos;
+       u32 * const prev_tab = mf->prev_tab;
+       const u32 nice_len = min(bytes_remaining, mf->base.params.nice_match_len);
+       u32 best_len = mf->base.params.min_match_len - 1;
+       u32 depth_remaining = mf->base.params.max_search_depth;
+       u32 num_matches = 0;
        u32 hash;
        u32 cur_match;
-       u32 num_matches = 0;
 
-       if (bytes_remaining <= LZ_HC_HASH_BYTES)
+       if (unlikely(bytes_remaining <= mf->base.params.min_match_len))
                goto out;
 
+       /* Insert the current position into the appropriate hash chain and set
+        * 'cur_match' to the previous head.
+        *
+        * For a slight performance improvement, we do each hash calculation one
+        * position in advance and prefetch the necessary hash table entry.  */
+
        hash = mf->next_hash;
-       mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1);
+       mf->next_hash = lz_hc_hash(strptr + 1);
        prefetch(&mf->hash_tab[mf->next_hash]);
        cur_match = mf->hash_tab[hash];
-       mf->hash_tab[hash] = mf->base.cur_window_pos;
-       mf->prev_tab[mf->base.cur_window_pos] = cur_match;
-
-       num_matches = do_search(mf->base.cur_window,
-                               mf->base.cur_window_pos,
-                               mf->prev_tab,
-                               cur_match,
-                               mf->base.params.min_match_len,
-                               min(bytes_remaining, mf->base.params.nice_match_len),
-                               mf->base.params.max_search_depth,
-                               matches);
-
-       /* If the longest match is @nice_match_len in length, it may have been
-        * truncated.  Try extending it up to the maximum match length.  */
-       if (num_matches != 0 &&
-           matches[num_matches - 1].len == mf->base.params.nice_match_len)
-       {
-               const u8 * const strptr = lz_mf_get_window_ptr(&mf->base);
-               const u8 * const matchptr = strptr - matches[num_matches - 1].offset;
-               const u32 len_limit = min(bytes_remaining, mf->base.params.max_match_len);
+       mf->hash_tab[hash] = cur_pos;
+       prev_tab[cur_pos] = cur_match;
+
+       for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) {
+
+               const u8 * const matchptr = &window[cur_match];
                u32 len;
 
-               len = matches[num_matches - 1].len;
-               while (len < len_limit && strptr[len] == matchptr[len])
-                       len++;
-               matches[num_matches - 1].len = len;
+               /* Considering a match at 'matchptr'.  */
+
+               /* The bytes at index 'best_len' are the most likely to differ,
+                * so check them first.
+                *
+                * The bytes at indices 'best_len - 1' and '0' are less
+                * important to check separately.  But doing so still gives a
+                * slight performance improvement, probably because they create
+                * separate branches for the CPU to predict independently of the
+                * branches in the main comparison loops.  */
+               if (matchptr[best_len] != strptr[best_len] ||
+                   matchptr[best_len - 1] != strptr[best_len - 1] ||
+                   matchptr[0] != strptr[0])
+                       goto next_match;
+
+               for (len = 1; len < best_len - 1; len++)
+                       if (matchptr[len] != strptr[len])
+                               goto next_match;
+
+               /* We now know the match length is at least 'best_len + 1'.  */
+
+               len = best_len;
+
+               do {
+                       if (++len == nice_len) {
+                               /* 'nice_len' reached; don't waste time
+                                * searching for longer matches.  Extend the
+                                * match as far as possible, record it, and
+                                * return.  */
+                               const u32 max_len = min(bytes_remaining,
+                                                       mf->base.params.max_match_len);
+                               while (len < max_len && strptr[len] == matchptr[len])
+                                       len++;
+                               matches[num_matches++] = (struct lz_match) {
+                                       .len = len,
+                                       .offset = strptr - matchptr,
+                               };
+                               goto out;
+                       }
+               } while (matchptr[len] == strptr[len]);
+
+               /* Found a longer match, but 'nice_len' not yet reached.  */
+               best_len = len;
+               matches[num_matches++] = (struct lz_match) {
+                       .len = len,
+                       .offset = strptr - matchptr,
+               };
+
+       next_match:
+               /* Continue to next match in the chain.  */
+               ;
        }
 
 out:
@@ -260,7 +263,7 @@ lz_hc_skip_position(struct lz_hc *mf)
        const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
        u32 hash;
 
-       if (bytes_remaining <= LZ_HC_HASH_BYTES)
+       if (unlikely(bytes_remaining <= mf->base.params.min_match_len))
                goto out;
 
        hash = mf->next_hash;