X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=include%2Fwimlib%2Fhc_matchfinder.h;h=2c28a64633c54b93abe883a157b7728e0a92313c;hb=7976ce465b28b86b4cee954a8116aae435058186;hp=1f552db2e73bcccd2550a747720938343cf9fc52;hpb=eb3e3b72db23ecaa7789a807afeb9577962653fe;p=wimlib diff --git a/include/wimlib/hc_matchfinder.h b/include/wimlib/hc_matchfinder.h index 1f552db2..2c28a646 100644 --- a/include/wimlib/hc_matchfinder.h +++ b/include/wimlib/hc_matchfinder.h @@ -1,21 +1,28 @@ /* * hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists * - * The following copying information applies to this specific source code file: - * - * Written in 2014-2015 by Eric Biggers - * - * To the extent possible under law, the author(s) have dedicated all copyright - * and related and neighboring rights to this software to the public domain - * worldwide via the Creative Commons Zero 1.0 Universal Public Domain - * Dedication (the "CC0"). - * - * This software is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the CC0 for more details. - * - * You should have received a copy of the CC0 along with this software; if not - * see . + * Copyright 2022 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. * * --------------------------------------------------------------------------- * @@ -81,7 +88,7 @@ * chain for length 3+ matches, the algorithm just checks for one close length 3 * match, then focuses on finding length 4+ matches. * - * The longest_match() and skip_positions() functions are inlined into the + * The longest_match() and skip_bytes() functions are inlined into the * compressors that use them. This isn't just about saving the overhead of a * function call. These functions are intended to be called from the inner * loops of compressors, where giving the compiler more control over register @@ -118,8 +125,8 @@ #include "wimlib/lz_hash.h" #include "wimlib/unaligned.h" -#define HC_MATCHFINDER_HASH3_ORDER 14 -#define HC_MATCHFINDER_HASH4_ORDER 15 +#define HC_MATCHFINDER_HASH3_ORDER 15 +#define HC_MATCHFINDER_HASH4_ORDER 16 /* TEMPLATED functions and structures have MF_SUFFIX appended to their name. */ #undef TEMPLATED @@ -141,7 +148,7 @@ struct TEMPLATED(hc_matchfinder) { /* Return the number of bytes that must be allocated for a 'hc_matchfinder' that * can work with buffers up to the specified size. */ -static inline size_t +static forceinline size_t TEMPLATED(hc_matchfinder_size)(size_t max_bufsize) { return sizeof(struct TEMPLATED(hc_matchfinder)) + @@ -149,7 +156,7 @@ TEMPLATED(hc_matchfinder_size)(size_t max_bufsize) } /* Prepare the matchfinder for a new input buffer. */ -static inline void +static forceinline void TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf) { memset(mf, 0, sizeof(*mf)); @@ -162,9 +169,9 @@ TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf) * The matchfinder structure. * @in_begin * Pointer to the beginning of the input buffer. - * @cur_pos - * The current position in the input buffer (the position of the sequence - * being matched against). + * @in_next + * Pointer to the next position in the input buffer, i.e. the sequence + * being matched against. * @best_len * Require a match longer than this length. * @max_len @@ -184,26 +191,26 @@ TEMPLATED(hc_matchfinder_init)(struct TEMPLATED(hc_matchfinder) *mf) * Return the length of the match found, or 'best_len' if no match longer than * 'best_len' was found. */ -static inline u32 -TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const restrict mf, - const u8 * const restrict in_begin, - const ptrdiff_t cur_pos, +static forceinline u32 +TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const mf, + const u8 * const in_begin, + const u8 * const in_next, u32 best_len, const u32 max_len, const u32 nice_len, const u32 max_search_depth, - u32 next_hashes[const restrict static 2], - u32 * const restrict offset_ret) + u32 * const next_hashes, + u32 * const offset_ret) { - const u8 *in_next = in_begin + cur_pos; u32 depth_remaining = max_search_depth; - const u8 *best_matchptr = best_matchptr; /* uninitialized */ + const u8 *best_matchptr = in_next; mf_pos_t cur_node3, cur_node4; u32 hash3, hash4; - u32 next_seq3, next_seq4; + u32 next_hashseq; u32 seq4; const u8 *matchptr; u32 len; + u32 cur_pos = in_next - in_begin; if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */ goto out; @@ -226,10 +233,9 @@ TEMPLATED(hc_matchfinder_longest_match)(struct TEMPLATED(hc_matchfinder) * const mf->next_tab[cur_pos] = cur_node4; /* Compute the next hash codes. */ - next_seq4 = load_u32_unaligned(in_next + 1); - next_seq3 = loaded_u32_to_u24(next_seq4); - next_hashes[0] = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER); - next_hashes[1] = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER); + next_hashseq = get_unaligned_le32(in_next + 1); + next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER); + next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER); prefetchw(&mf->hash3_tab[next_hashes[0]]); prefetchw(&mf->hash4_tab[next_hashes[1]]); @@ -339,54 +345,49 @@ out: * The matchfinder structure. * @in_begin * Pointer to the beginning of the input buffer. - * @cur_pos - * The current position in the input buffer (the position of the sequence - * being matched against). - * @end_pos - * The length of the input buffer. + * @in_next + * Pointer to the next position in the input buffer. + * @in_end + * Pointer to the end of the input buffer. + * @count + * The number of bytes to advance. Must be > 0. * @next_hashes * The precomputed hash codes for the sequence beginning at @in_next. * These will be used and then updated with the precomputed hashcodes for * the sequence beginning at @in_next + @count. - * @count - * The number of bytes to advance. Must be > 0. - * - * Returns @in_next + @count. */ -static inline const u8 * -TEMPLATED(hc_matchfinder_skip_positions)(struct TEMPLATED(hc_matchfinder) * const restrict mf, - const u8 * const restrict in_begin, - const ptrdiff_t cur_pos, - const ptrdiff_t end_pos, - const u32 count, - u32 next_hashes[const restrict static 2]) +static forceinline void +TEMPLATED(hc_matchfinder_skip_bytes)(struct TEMPLATED(hc_matchfinder) * const mf, + const u8 * const in_begin, + const u8 *in_next, + const u8 * const in_end, + const u32 count, + u32 * const next_hashes) { - const u8 *in_next = in_begin + cur_pos; - const u8 * const stop_ptr = in_next + count; - - if (likely(count + 5 <= end_pos - cur_pos)) { - u32 hash3, hash4; - u32 next_seq3, next_seq4; - - hash3 = next_hashes[0]; - hash4 = next_hashes[1]; - do { - mf->hash3_tab[hash3] = in_next - in_begin; - mf->next_tab[in_next - in_begin] = mf->hash4_tab[hash4]; - mf->hash4_tab[hash4] = in_next - in_begin; - - next_seq4 = load_u32_unaligned(++in_next); - next_seq3 = loaded_u32_to_u24(next_seq4); - hash3 = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER); - hash4 = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER); - - } while (in_next != stop_ptr); - - prefetchw(&mf->hash3_tab[hash3]); - prefetchw(&mf->hash4_tab[hash4]); - next_hashes[0] = hash3; - next_hashes[1] = hash4; - } + u32 cur_pos; + u32 hash3, hash4; + u32 next_hashseq; + u32 remaining = count; - return stop_ptr; + if (unlikely(count + 5 > in_end - in_next)) + return; + + cur_pos = in_next - in_begin; + hash3 = next_hashes[0]; + hash4 = next_hashes[1]; + do { + mf->hash3_tab[hash3] = cur_pos; + mf->next_tab[cur_pos] = mf->hash4_tab[hash4]; + mf->hash4_tab[hash4] = cur_pos; + + next_hashseq = get_unaligned_le32(++in_next); + hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER); + hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER); + cur_pos++; + } while (--remaining); + + prefetchw(&mf->hash3_tab[hash3]); + prefetchw(&mf->hash4_tab[hash4]); + next_hashes[0] = hash3; + next_hashes[1] = hash4; }