2 * matchfinder_common.h - common code for Lempel-Ziv matchfinding
4 * Copyright 2022 Eric Biggers
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 * OTHER DEALINGS IN THE SOFTWARE.
28 #ifndef _WIMLIB_MATCHFINDER_COMMON_H
29 #define _WIMLIB_MATCHFINDER_COMMON_H
31 #include "wimlib/bitops.h"
32 #include "wimlib/unaligned.h"
35 * Given a 32-bit value that was loaded with the platform's native endianness,
36 * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
37 * bits contain the first 3 bytes, arranged in octets in a platform-dependent
38 * order, at the memory location from which the input 32-bit value was loaded.
40 static forceinline u32
41 loaded_u32_to_u24(u32 v)
43 if (CPU_IS_LITTLE_ENDIAN())
50 * Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value.
51 * The order in which the 3 bytes will be arranged as octets in the 24 bits is
52 * platform-dependent. At least 4 bytes (not 3) must be available at @p.
54 static forceinline u32
55 load_u24_unaligned(const u8 *p)
57 #if UNALIGNED_ACCESS_IS_FAST
58 return loaded_u32_to_u24(load_u32_unaligned(p));
60 if (CPU_IS_LITTLE_ENDIAN())
61 return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
63 return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
68 * The hash function: given a sequence prefix held in the low-order bits of a
69 * 32-bit value, multiply by a carefully-chosen large constant. Discard any
70 * bits of the product that don't fit in a 32-bit value, but take the
71 * next-highest @num_bits bits of the product as the hash value, as those have
72 * the most randomness.
74 static forceinline u32
75 lz_hash(u32 seq, unsigned num_bits)
77 return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
81 * Return the number of bytes at @matchptr that match the bytes at @strptr, up
82 * to a maximum of @max_len. Initially, @start_len bytes are matched.
84 static forceinline unsigned
85 lz_extend(const u8 * const strptr, const u8 * const matchptr,
86 const unsigned start_len, const unsigned max_len)
88 unsigned len = start_len;
89 machine_word_t v_word;
91 if (UNALIGNED_ACCESS_IS_FAST) {
93 if (likely(max_len - len >= 4 * WORDBYTES)) {
95 #define COMPARE_WORD_STEP \
96 v_word = load_word_unaligned(&matchptr[len]) ^ \
97 load_word_unaligned(&strptr[len]); \
106 #undef COMPARE_WORD_STEP
109 while (len + WORDBYTES <= max_len) {
110 v_word = load_word_unaligned(&matchptr[len]) ^
111 load_word_unaligned(&strptr[len]);
118 while (len < max_len && matchptr[len] == strptr[len])
123 if (CPU_IS_LITTLE_ENDIAN())
124 len += (bsfw(v_word) >> 3);
126 len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
130 #endif /* _WIMLIB_MATCHFINDER_COMMON_H */