7 * The author dedicates this file to the public domain.
8 * You can do whatever you want with this file.
10 * ---------------------------------------------------------------------------
14 * This is a Hash Chains (hc) based matchfinder.
16 * The data structure is a hash table where each hash bucket contains a linked
17 * list (or "chain") of sequences whose first 3 bytes share the same hash code.
18 * Each sequence is identified by its starting position in the input buffer.
20 * The algorithm processes the input buffer sequentially. At each byte
21 * position, the hash code of the first 3 bytes of the sequence beginning at
22 * that position (the sequence being matched against) is computed. This
23 * identifies the hash bucket to use for that position. Then, this hash
24 * bucket's linked list is searched for matches. Then, a new linked list node
25 * is created to represent the current sequence and is prepended to the list.
27 * This algorithm has several useful properties:
29 * - It only finds true Lempel-Ziv matches; i.e., those where the matching
30 * sequence occurs prior to the sequence being matched against.
32 * - The sequences in each linked list are always sorted by decreasing starting
33 * position. Therefore, the closest (smallest offset) matches are found
34 * first, which in many compression formats tend to be the cheapest to encode.
36 * - Although fast running time is not guaranteed due to the possibility of the
37 * lists getting very long, the worst degenerate behavior can be easily
38 * prevented by capping the number of nodes searched at each position.
40 * - If the compressor decides not to search for matches at a certain position,
41 * then that position can be quickly inserted without searching the list.
43 * - The algorithm is adaptable to sliding windows: just store the positions
44 * relative to a "base" value that is updated from time to time, and stop
45 * searching each list when the sequences get too far away.
47 * ---------------------------------------------------------------------------
51 * You must define MATCHFINDER_MAX_WINDOW_ORDER before including this header
52 * because that determines which integer type to use for positions. Since
53 * 16-bit integers are faster than 32-bit integers due to reduced memory usage
54 * (and therefore reduced cache pressure), the code only uses 32-bit integers if
55 * they are needed to represent all possible positions.
57 * You must allocate the 'struct hc_matchfinder' on a
58 * MATCHFINDER_ALIGNMENT-aligned boundary, and its necessary allocation size
59 * must be gotten by calling hc_matchfinder_size().
61 * ----------------------------------------------------------------------------
65 * The longest_match() and skip_positions() functions are inlined into the
66 * compressors that use them. This isn't just about saving the overhead of a
67 * function call. These functions are intended to be called from the inner
68 * loops of compressors, where giving the compiler more control over register
69 * allocation is very helpful. There is also significant benefit to be gained
70 * from allowing the CPU to predict branches independently at each call site.
71 * For example, "lazy"-style compressors can be written with two calls to
72 * longest_match(), each of which starts with a different 'best_len' and
73 * therefore has significantly different performance characteristics.
75 * Although any hash function can be used, a multiplicative hash is fast and
78 * On some processors, it is significantly faster to extend matches by whole
79 * words (32 or 64 bits) instead of by individual bytes. For this to be the
80 * case, the processor must implement unaligned memory accesses efficiently and
81 * must have either a fast "find first set bit" instruction or a fast "find last
82 * set bit" instruction, depending on the processor's endianness.
84 * The code uses one loop for finding the first match and one loop for finding a
85 * longer match. Each of these loops is tuned for its respective task and in
86 * combination are faster than a single generalized loop that handles both
89 * The code also uses a tight inner loop that only compares the last and first
90 * bytes of a potential match. It is only when these bytes match that a full
91 * match extension is attempted.
93 * ----------------------------------------------------------------------------
96 #ifndef _HC_MATCHFINDER_H
97 #define _HC_MATCHFINDER_H
99 #include "wimlib/lz_extend.h"
100 #include "wimlib/lz_hash.h"
101 #include "wimlib/matchfinder_common.h"
102 #include "wimlib/unaligned.h"
104 #if MATCHFINDER_MAX_WINDOW_ORDER < 14
105 # define HC_MATCHFINDER_HASH_ORDER 14
107 # define HC_MATCHFINDER_HASH_ORDER 15
110 #define HC_MATCHFINDER_HASH_LENGTH (1UL << HC_MATCHFINDER_HASH_ORDER)
112 struct hc_matchfinder {
113 pos_t hash_tab[HC_MATCHFINDER_HASH_LENGTH];
115 } _aligned_attribute(MATCHFINDER_ALIGNMENT);
117 /* Return the number of bytes that must be allocated for a 'hc_matchfinder' that
118 * can work with buffers up to the specified size. */
120 hc_matchfinder_size(size_t max_bufsize)
122 return sizeof(pos_t) * (HC_MATCHFINDER_HASH_LENGTH + max_bufsize);
125 /* Prepare the matchfinder for a new input buffer. */
127 hc_matchfinder_init(struct hc_matchfinder *mf)
129 matchfinder_init(mf->hash_tab, HC_MATCHFINDER_HASH_LENGTH);
133 * Find the longest match longer than 'best_len' bytes.
136 * The matchfinder structure.
138 * Pointer to the beginning of the input buffer.
140 * Pointer to the next byte in the input buffer to process. This is the
141 * pointer to the sequence being matched against.
143 * Require a match longer than this length.
145 * The maximum permissible match length at this position.
147 * Stop searching if a match of at least this length is found.
148 * Must be <= @max_len.
150 * Limit on the number of potential matches to consider. Must be >= 1.
152 * If a match is found, its offset is returned in this location.
154 * Return the length of the match found, or 'best_len' if no match longer than
155 * 'best_len' was found.
157 static inline unsigned
158 hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
159 const u8 * const in_begin,
160 const u8 * const in_next,
162 const unsigned max_len,
163 const unsigned nice_len,
164 const unsigned max_search_depth,
165 unsigned *offset_ret)
167 unsigned depth_remaining = max_search_depth;
168 const u8 *best_matchptr = best_matchptr; /* uninitialized */
175 /* Insert the current sequence into the appropriate linked list. */
176 if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES))
178 first_3_bytes = load_u24_unaligned(in_next);
179 hash = lz_hash(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
180 cur_node = mf->hash_tab[hash];
181 mf->next_tab[in_next - in_begin] = cur_node;
182 mf->hash_tab[hash] = in_next - in_begin;
184 if (unlikely(best_len >= max_len))
187 /* Search the appropriate linked list for matches. */
189 if (!(matchfinder_node_valid(cur_node)))
194 /* No length 3 match found yet.
195 * Check the first 3 bytes. */
196 matchptr = &in_begin[cur_node];
198 if (load_u24_unaligned(matchptr) == first_3_bytes)
201 /* The first 3 bytes did not match. Keep trying. */
202 cur_node = mf->next_tab[cur_node];
203 if (!matchfinder_node_valid(cur_node) || !--depth_remaining)
207 /* Found a match of length >= 3. Extend it to its full length. */
208 best_matchptr = matchptr;
209 best_len = lz_extend(in_next, best_matchptr, 3, max_len);
210 if (best_len >= nice_len)
212 cur_node = mf->next_tab[cur_node];
213 if (!matchfinder_node_valid(cur_node) || !--depth_remaining)
219 matchptr = &in_begin[cur_node];
221 /* Already found a length 3 match. Try for a longer match;
222 * start by checking the last 2 bytes and the first 4 bytes. */
223 #if UNALIGNED_ACCESS_IS_FAST
224 if ((load_u32_unaligned(matchptr + best_len - 3) ==
225 load_u32_unaligned(in_next + best_len - 3)) &&
226 (load_u32_unaligned(matchptr) ==
227 load_u32_unaligned(in_next)))
229 if (matchptr[best_len] == in_next[best_len])
233 cur_node = mf->next_tab[cur_node];
234 if (!matchfinder_node_valid(cur_node) || !--depth_remaining)
238 #if UNALIGNED_ACCESS_IS_FAST
243 len = lz_extend(in_next, matchptr, len, max_len);
244 if (len > best_len) {
246 best_matchptr = matchptr;
247 if (best_len >= nice_len)
250 cur_node = mf->next_tab[cur_node];
251 if (!matchfinder_node_valid(cur_node) || !--depth_remaining)
255 *offset_ret = in_next - best_matchptr;
260 * Advance the matchfinder, but don't search for matches.
263 * The matchfinder structure.
265 * Pointer to the beginning of the input buffer.
267 * Pointer to the next byte in the input buffer to process.
269 * Pointer to the end of the input buffer.
271 * The number of bytes to advance. Must be > 0.
274 hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
282 if (unlikely(in_next + count >= in_end - LZ_HASH_REQUIRED_NBYTES))
286 hash = lz_hash_3_bytes(in_next, HC_MATCHFINDER_HASH_ORDER);
287 mf->next_tab[in_next - in_begin] = mf->hash_tab[hash];
288 mf->hash_tab[hash] = in_next - in_begin;
293 #endif /* _HC_MATCHFINDER_H */