mf->next_hash = lz_hc_hash(window);
}
-static inline u32
-do_search(const u8 * restrict window,
- const u32 cur_window_pos,
- u32 prev_tab[restrict],
- u32 cur_match,
- const u32 min_len,
- const u32 max_len,
- const u32 max_search_depth,
- struct lz_match matches[restrict])
+static u32
+lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[])
{
- const u8 * const strptr = &window[cur_window_pos];
- u32 best_len = min_len - 1;
- u32 depth_remaining = max_search_depth;
+ struct lz_hc *mf = (struct lz_hc *)_mf;
+ const u8 * const window = mf->base.cur_window;
+ const u32 cur_pos = mf->base.cur_window_pos;
+ const u8 * const strptr = &window[cur_pos];
+ const u32 bytes_remaining = mf->base.cur_window_size - cur_pos;
+ u32 * const prev_tab = mf->prev_tab;
+ const u32 nice_len = min(bytes_remaining, mf->base.params.nice_match_len);
+ u32 best_len = mf->base.params.min_match_len - 1;
+ u32 depth_remaining = mf->base.params.max_search_depth;
u32 num_matches = 0;
+ u32 hash;
+ u32 cur_match;
+
+ if (unlikely(bytes_remaining <= mf->base.params.min_match_len))
+ goto out;
+
+ /* Insert the current position into the appropriate hash chain and set
+ * 'cur_match' to the previous head.
+ *
+ * For a slight performance improvement, we do each hash calculation one
+ * position in advance and prefetch the necessary hash table entry. */
+
+ hash = mf->next_hash;
+ mf->next_hash = lz_hc_hash(strptr + 1);
+ prefetch(&mf->hash_tab[mf->next_hash]);
+ cur_match = mf->hash_tab[hash];
+ mf->hash_tab[hash] = cur_pos;
+ prev_tab[cur_pos] = cur_match;
for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) {
const u8 * const matchptr = &window[cur_match];
u32 len;
+ /* Considering a match at 'matchptr'. */
+
+ /* The bytes at index 'best_len' are the most likely to differ,
+ * so check them first.
+ *
+ * The bytes at indices 'best_len - 1' and '0' are less
+ * important to check separately. But doing so still gives a
+ * slight performance improvement, probably because they create
+ * separate branches for the CPU to predict independently of the
+ * branches in the main comparison loops. */
if (matchptr[best_len] != strptr[best_len] ||
matchptr[best_len - 1] != strptr[best_len - 1] ||
matchptr[0] != strptr[0])
if (matchptr[len] != strptr[len])
goto next_match;
- len = best_len;
+ /* We now know the match length is at least 'best_len + 1'. */
- while (++len != max_len)
- if (matchptr[len] != strptr[len])
- break;
+ len = best_len;
+ do {
+ if (++len == nice_len) {
+ /* 'nice_len' reached; don't waste time
+ * searching for longer matches. Extend the
+ * match as far as possible, record it, and
+ * return. */
+ const u32 max_len = min(bytes_remaining,
+ mf->base.params.max_match_len);
+ while (len < max_len && strptr[len] == matchptr[len])
+ len++;
+ matches[num_matches++] = (struct lz_match) {
+ .len = len,
+ .offset = strptr - matchptr,
+ };
+ goto out;
+ }
+ } while (matchptr[len] == strptr[len]);
+
+ /* Found a longer match, but 'nice_len' not yet reached. */
+ best_len = len;
matches[num_matches++] = (struct lz_match) {
.len = len,
.offset = strptr - matchptr,
};
- best_len = len;
- if (best_len == max_len)
- break;
+
next_match:
+ /* Continue to next match in the chain. */
;
}
- return num_matches;
-}
-
-static u32
-lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[])
-{
- struct lz_hc *mf = (struct lz_hc *)_mf;
- const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
- u32 hash;
- u32 cur_match;
- u32 num_matches = 0;
-
- if (bytes_remaining <= LZ_HC_HASH_BYTES)
- goto out;
-
- hash = mf->next_hash;
- mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1);
- prefetch(&mf->hash_tab[mf->next_hash]);
- cur_match = mf->hash_tab[hash];
- mf->hash_tab[hash] = mf->base.cur_window_pos;
- mf->prev_tab[mf->base.cur_window_pos] = cur_match;
-
- num_matches = do_search(mf->base.cur_window,
- mf->base.cur_window_pos,
- mf->prev_tab,
- cur_match,
- mf->base.params.min_match_len,
- min(bytes_remaining, mf->base.params.nice_match_len),
- mf->base.params.max_search_depth,
- matches);
-
- /* If the longest match is @nice_match_len in length, it may have been
- * truncated. Try extending it up to the maximum match length. */
- if (num_matches != 0 &&
- matches[num_matches - 1].len == mf->base.params.nice_match_len)
- {
- const u8 * const strptr = lz_mf_get_window_ptr(&mf->base);
- const u8 * const matchptr = strptr - matches[num_matches - 1].offset;
- const u32 len_limit = min(bytes_remaining, mf->base.params.max_match_len);
- u32 len;
-
- len = matches[num_matches - 1].len;
- while (len < len_limit && strptr[len] == matchptr[len])
- len++;
- matches[num_matches - 1].len = len;
- }
out:
mf->base.cur_window_pos++;
const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
u32 hash;
- if (bytes_remaining <= LZ_HC_HASH_BYTES)
+ if (unlikely(bytes_remaining <= mf->base.params.min_match_len))
goto out;
hash = mf->next_hash;