From: Eric Biggers Date: Sun, 31 Jul 2022 02:03:42 +0000 (-0700) Subject: hc_matchfinder: sync with libdeflate X-Git-Tag: v1.13.6~13 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=7976ce465b28b86b4cee954a8116aae435058186 hc_matchfinder: sync with libdeflate --- diff --git a/include/wimlib/hc_matchfinder.h b/include/wimlib/hc_matchfinder.h index a790c6ad..2c28a646 100644 --- a/include/wimlib/hc_matchfinder.h +++ b/include/wimlib/hc_matchfinder.h @@ -88,7 +88,7 @@ * chain for length 3+ matches, the algorithm just checks for one close length 3 * match, then focuses on finding length 4+ matches. * - * The longest_match() and skip_positions() functions are inlined into the + * The longest_match() and skip_bytes() functions are inlined into the * compressors that use them. This isn't just about saving the overhead of a * function call. These functions are intended to be called from the inner * loops of compressors, where giving the compiler more control over register @@ -125,8 +125,8 @@ #include "wimlib/lz_hash.h" #include "wimlib/unaligned.h" -#define HC_MATCHFINDER_HASH3_ORDER 14 -#define HC_MATCHFINDER_HASH4_ORDER 15 +#define HC_MATCHFINDER_HASH3_ORDER 15 +#define HC_MATCHFINDER_HASH4_ORDER 16 /* TEMPLATED functions and structures have MF_SUFFIX appended to their name. */ #undef TEMPLATED @@ -169,9 +169,9 @@ TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf) * The matchfinder structure. * @in_begin * Pointer to the beginning of the input buffer. - * @cur_pos - * The current position in the input buffer (the position of the sequence - * being matched against). + * @in_next + * Pointer to the next position in the input buffer, i.e. the sequence + * being matched against. * @best_len * Require a match longer than this length. * @max_len @@ -192,25 +192,25 @@ TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf) * 'best_len' was found. */ static forceinline u32 -TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const restrict mf, - const u8 * const restrict in_begin, - const ptrdiff_t cur_pos, +TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const mf, + const u8 * const in_begin, + const u8 * const in_next, u32 best_len, const u32 max_len, const u32 nice_len, const u32 max_search_depth, - u32 next_hashes[const restrict static 2], - u32 * const restrict offset_ret) + u32 * const next_hashes, + u32 * const offset_ret) { - const u8 *in_next = in_begin + cur_pos; u32 depth_remaining = max_search_depth; const u8 *best_matchptr = in_next; mf_pos_t cur_node3, cur_node4; u32 hash3, hash4; - u32 next_seq3, next_seq4; + u32 next_hashseq; u32 seq4; const u8 *matchptr; u32 len; + u32 cur_pos = in_next - in_begin; if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */ goto out; @@ -233,10 +233,9 @@ TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const mf->next_tab[cur_pos] = cur_node4; /* Compute the next hash codes. */ - next_seq4 = load_u32_unaligned(in_next + 1); - next_seq3 = loaded_u32_to_u24(next_seq4); - next_hashes[0] = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER); - next_hashes[1] = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER); + next_hashseq = get_unaligned_le32(in_next + 1); + next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER); + next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER); prefetchw(&mf->hash3_tab[next_hashes[0]]); prefetchw(&mf->hash4_tab[next_hashes[1]]); @@ -346,54 +345,49 @@ out: * The matchfinder structure. * @in_begin * Pointer to the beginning of the input buffer. - * @cur_pos - * The current position in the input buffer (the position of the sequence - * being matched against). - * @end_pos - * The length of the input buffer. + * @in_next + * Pointer to the next position in the input buffer. + * @in_end + * Pointer to the end of the input buffer. + * @count + * The number of bytes to advance. Must be > 0. * @next_hashes * The precomputed hash codes for the sequence beginning at @in_next. * These will be used and then updated with the precomputed hashcodes for * the sequence beginning at @in_next + @count. - * @count - * The number of bytes to advance. Must be > 0. - * - * Returns @in_next + @count. */ -static forceinline const u8 * -TEMPLATED(hc_matchfinder_skip_positions)(struct TEMPLATED(hc_matchfinder) * const restrict mf, - const u8 * const restrict in_begin, - const ptrdiff_t cur_pos, - const ptrdiff_t end_pos, - const u32 count, - u32 next_hashes[const restrict static 2]) +static forceinline void +TEMPLATED(hc_matchfinder_skip_bytes)(struct TEMPLATED(hc_matchfinder) * const mf, + const u8 * const in_begin, + const u8 *in_next, + const u8 * const in_end, + const u32 count, + u32 * const next_hashes) { - const u8 *in_next = in_begin + cur_pos; - const u8 * const stop_ptr = in_next + count; - - if (likely(count + 5 <= end_pos - cur_pos)) { - u32 hash3, hash4; - u32 next_seq3, next_seq4; - - hash3 = next_hashes[0]; - hash4 = next_hashes[1]; - do { - mf->hash3_tab[hash3] = in_next - in_begin; - mf->next_tab[in_next - in_begin] = mf->hash4_tab[hash4]; - mf->hash4_tab[hash4] = in_next - in_begin; - - next_seq4 = load_u32_unaligned(++in_next); - next_seq3 = loaded_u32_to_u24(next_seq4); - hash3 = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER); - hash4 = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER); - - } while (in_next != stop_ptr); - - prefetchw(&mf->hash3_tab[hash3]); - prefetchw(&mf->hash4_tab[hash4]); - next_hashes[0] = hash3; - next_hashes[1] = hash4; - } + u32 cur_pos; + u32 hash3, hash4; + u32 next_hashseq; + u32 remaining = count; + + if (unlikely(count + 5 > in_end - in_next)) + return; - return stop_ptr; + cur_pos = in_next - in_begin; + hash3 = next_hashes[0]; + hash4 = next_hashes[1]; + do { + mf->hash3_tab[hash3] = cur_pos; + mf->next_tab[cur_pos] = mf->hash4_tab[hash4]; + mf->hash4_tab[hash4] = cur_pos; + + next_hashseq = get_unaligned_le32(++in_next); + hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER); + hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER); + cur_pos++; + } while (--remaining); + + prefetchw(&mf->hash3_tab[hash3]); + prefetchw(&mf->hash4_tab[hash4]); + next_hashes[0] = hash3; + next_hashes[1] = hash4; } diff --git a/src/lzx_compress.c b/src/lzx_compress.c index 110db5dd..89031d4e 100644 --- a/src/lzx_compress.c +++ b/src/lzx_compress.c @@ -2569,7 +2569,7 @@ lzx_compress_lazy(struct lzx_compressor * restrict c, cur_len = CALL_HC_MF(is_16_bit, c, hc_matchfinder_longest_match, in_begin, - in_next - in_begin, + in_next, 2, max_len, nice_len, @@ -2646,7 +2646,7 @@ lzx_compress_lazy(struct lzx_compressor * restrict c, next_len = CALL_HC_MF(is_16_bit, c, hc_matchfinder_longest_match, in_begin, - in_next - in_begin, + in_next, cur_len - 2, max_len, nice_len, @@ -2707,13 +2707,14 @@ lzx_compress_lazy(struct lzx_compressor * restrict c, lzx_choose_match(c, cur_len, cur_adjusted_offset, recent_offsets, is_16_bit, &litrunlen, &next_seq); - in_next = CALL_HC_MF(is_16_bit, c, - hc_matchfinder_skip_positions, - in_begin, - in_next - in_begin, - in_end - in_begin, - skip_len, - next_hashes); + CALL_HC_MF(is_16_bit, c, + hc_matchfinder_skip_bytes, + in_begin, + in_next, + in_end, + skip_len, + next_hashes); + in_next += skip_len; /* Keep going until it's time to end the block. */ } while (in_next < in_max_block_end && diff --git a/src/xpress_compress.c b/src/xpress_compress.c index 1b430912..379388cd 100644 --- a/src/xpress_compress.c +++ b/src/xpress_compress.c @@ -544,7 +544,7 @@ xpress_compress_greedy(struct xpress_compressor * restrict c, length = hc_matchfinder_longest_match(&c->hc_mf, in_begin, - in_next - in_begin, + in_next, XPRESS_MIN_MATCH_LEN - 1, in_end - in_next, min(in_end - in_next, c->nice_match_length), @@ -558,12 +558,12 @@ xpress_compress_greedy(struct xpress_compressor * restrict c, *next_chosen_item++ = xpress_record_match(c, length, offset); in_next += 1; - hc_matchfinder_skip_positions(&c->hc_mf, - in_begin, - in_next - in_begin, - in_end - in_begin, - length - 1, - next_hashes); + hc_matchfinder_skip_bytes(&c->hc_mf, + in_begin, + in_next, + in_end, + length - 1, + next_hashes); in_next += length - 1; } else { /* No match found */ @@ -610,7 +610,7 @@ xpress_compress_lazy(struct xpress_compressor * restrict c, /* Find the longest match at the current position. */ cur_len = hc_matchfinder_longest_match(&c->hc_mf, in_begin, - in_next - in_begin, + in_next, XPRESS_MIN_MATCH_LEN - 1, in_end - in_next, min(in_end - in_next, c->nice_match_length), @@ -638,12 +638,12 @@ xpress_compress_lazy(struct xpress_compressor * restrict c, *next_chosen_item++ = xpress_record_match(c, cur_len, cur_offset); - hc_matchfinder_skip_positions(&c->hc_mf, - in_begin, - in_next - in_begin, - in_end - in_begin, - cur_len - 1, - next_hashes); + hc_matchfinder_skip_bytes(&c->hc_mf, + in_begin, + in_next, + in_end, + cur_len - 1, + next_hashes); in_next += cur_len - 1; continue; } @@ -666,7 +666,7 @@ xpress_compress_lazy(struct xpress_compressor * restrict c, */ next_len = hc_matchfinder_longest_match(&c->hc_mf, in_begin, - in_next - in_begin, + in_next, cur_len, in_end - in_next, min(in_end - in_next, c->nice_match_length), @@ -688,12 +688,12 @@ xpress_compress_lazy(struct xpress_compressor * restrict c, * output the current match. */ *next_chosen_item++ = xpress_record_match(c, cur_len, cur_offset); - hc_matchfinder_skip_positions(&c->hc_mf, - in_begin, - in_next - in_begin, - in_end - in_begin, - cur_len - 2, - next_hashes); + hc_matchfinder_skip_bytes(&c->hc_mf, + in_begin, + in_next, + in_end, + cur_len - 2, + next_hashes); in_next += cur_len - 2; continue; }