extract.c: Fix extract_from_tmpfile()

[wimlib] / src / lz_hash_chains.c
diff --git a/src/lz_hash_chains.c b/src/lz_hash_chains.c

index 012d7c462cbdb4f92172d2bb30e5ab158483b149..46ea8130196305c2b2eff097526301b51d4f36cc 100644 (file)
--- a/src/lz_hash_chains.c
+++ b/src/lz_hash_chains.c
@@ -162,26 +162,53 @@ lz_hc_load_window(struct lz_mf *_mf, const u8 window[], u32 size)
                 mf->next_hash = lz_hc_hash(window);
  }
  
-static inline u32
-do_search(const u8 * restrict window,
-         const u32 cur_window_pos,
-         u32 prev_tab[restrict],
-         u32 cur_match,
-         const u32 min_len,
-         const u32 max_len,
-         const u32 max_search_depth,
-         struct lz_match matches[restrict])
+static u32
+lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[])
  {
-       const u8 * const strptr = &window[cur_window_pos];
-       u32 best_len = min_len - 1;
-       u32 depth_remaining = max_search_depth;
+       struct lz_hc *mf = (struct lz_hc *)_mf;
+       const u8 * const window = mf->base.cur_window;
+       const u32 cur_pos = mf->base.cur_window_pos;
+       const u8 * const strptr = &window[cur_pos];
+       const u32 bytes_remaining = mf->base.cur_window_size - cur_pos;
+       u32 * const prev_tab = mf->prev_tab;
+       const u32 nice_len = min(bytes_remaining, mf->base.params.nice_match_len);
+       u32 best_len = mf->base.params.min_match_len - 1;
+       u32 depth_remaining = mf->base.params.max_search_depth;
         u32 num_matches = 0;
+       u32 hash;
+       u32 cur_match;
+
+       if (unlikely(bytes_remaining <= mf->base.params.min_match_len))
+               goto out;
+
+       /* Insert the current position into the appropriate hash chain and set
+        * 'cur_match' to the previous head.
+        *
+        * For a slight performance improvement, we do each hash calculation one
+        * position in advance and prefetch the necessary hash table entry.  */
+
+       hash = mf->next_hash;
+       mf->next_hash = lz_hc_hash(strptr + 1);
+       prefetch(&mf->hash_tab[mf->next_hash]);
+       cur_match = mf->hash_tab[hash];
+       mf->hash_tab[hash] = cur_pos;
+       prev_tab[cur_pos] = cur_match;
  
         for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) {
  
                 const u8 * const matchptr = &window[cur_match];
                 u32 len;
  
+               /* Considering a match at 'matchptr'.  */
+
+               /* The bytes at index 'best_len' are the most likely to differ,
+                * so check them first.
+                *
+                * The bytes at indices 'best_len - 1' and '0' are less
+                * important to check separately.  But doing so still gives a
+                * slight performance improvement, probably because they create
+                * separate branches for the CPU to predict independently of the
+                * branches in the main comparison loops.  */
                 if (matchptr[best_len] != strptr[best_len] ||
                     matchptr[best_len - 1] != strptr[best_len - 1] ||
                     matchptr[0] != strptr[0])
@@ -191,68 +218,39 @@ do_search(const u8 * restrict window,
                         if (matchptr[len] != strptr[len])
                                 goto next_match;
  
-               len = best_len;
+               /* We now know the match length is at least 'best_len + 1'.  */
  
-               while (++len != max_len)
-                       if (matchptr[len] != strptr[len])
-                               break;
+               len = best_len;
  
+               do {
+                       if (++len == nice_len) {
+                               /* 'nice_len' reached; don't waste time
+                                * searching for longer matches.  Extend the
+                                * match as far as possible, record it, and
+                                * return.  */
+                               const u32 max_len = min(bytes_remaining,
+                                                       mf->base.params.max_match_len);
+                               while (len < max_len && strptr[len] == matchptr[len])
+                                       len++;
+                               matches[num_matches++] = (struct lz_match) {
+                                       .len = len,
+                                       .offset = strptr - matchptr,
+                               };
+                               goto out;
+                       }
+               } while (matchptr[len] == strptr[len]);
+
+               /* Found a longer match, but 'nice_len' not yet reached.  */
+               best_len = len;
                 matches[num_matches++] = (struct lz_match) {
                         .len = len,
                         .offset = strptr - matchptr,
                 };
-               best_len = len;
-               if (best_len == max_len)
-                       break;
+
         next_match:
+               /* Continue to next match in the chain.  */
                 ;
         }
-       return num_matches;
-}
-
-static u32
-lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[])
-{
-       struct lz_hc *mf = (struct lz_hc *)_mf;
-       const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
-       u32 hash;
-       u32 cur_match;
-       u32 num_matches = 0;
-
-       if (bytes_remaining <= LZ_HC_HASH_BYTES)
-               goto out;
-
-       hash = mf->next_hash;
-       mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1);
-       prefetch(&mf->hash_tab[mf->next_hash]);
-       cur_match = mf->hash_tab[hash];
-       mf->hash_tab[hash] = mf->base.cur_window_pos;
-       mf->prev_tab[mf->base.cur_window_pos] = cur_match;
-
-       num_matches = do_search(mf->base.cur_window,
-                               mf->base.cur_window_pos,
-                               mf->prev_tab,
-                               cur_match,
-                               mf->base.params.min_match_len,
-                               min(bytes_remaining, mf->base.params.nice_match_len),
-                               mf->base.params.max_search_depth,
-                               matches);
-
-       /* If the longest match is @nice_match_len in length, it may have been
-        * truncated.  Try extending it up to the maximum match length.  */
-       if (num_matches != 0 &&
-           matches[num_matches - 1].len == mf->base.params.nice_match_len)
-       {
-               const u8 * const strptr = lz_mf_get_window_ptr(&mf->base);
-               const u8 * const matchptr = strptr - matches[num_matches - 1].offset;
-               const u32 len_limit = min(bytes_remaining, mf->base.params.max_match_len);
-               u32 len;
-
-               len = matches[num_matches - 1].len;
-               while (len < len_limit && strptr[len] == matchptr[len])
-                       len++;
-               matches[num_matches - 1].len = len;
-       }
  
  out:
         mf->base.cur_window_pos++;
@@ -265,7 +263,7 @@ lz_hc_skip_position(struct lz_hc *mf)
         const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
         u32 hash;
  
-       if (bytes_remaining <= LZ_HC_HASH_BYTES)
+       if (unlikely(bytes_remaining <= mf->base.params.min_match_len))
                 goto out;
  
         hash = mf->next_hash;