7 * The author dedicates this file to the public domain.
8 * You can do whatever you want with this file.
10 * ---------------------------------------------------------------------------
14 * This is a Hash Chains (hc) based matchfinder.
16 * The data structure is a hash table where each hash bucket contains a linked
17 * list (or "chain") of sequences whose first 3 bytes share the same hash code.
18 * Each sequence is identified by its starting position in the input buffer.
20 * The algorithm processes the input buffer sequentially. At each byte
21 * position, the hash code of the first 3 bytes of the sequence beginning at
22 * that position (the sequence being matched against) is computed. This
23 * identifies the hash bucket to use for that position. Then, this hash
24 * bucket's linked list is searched for matches. Then, a new linked list node
25 * is created to represent the current sequence and is prepended to the list.
27 * This algorithm has several useful properties:
29 * - It only finds true Lempel-Ziv matches; i.e., those where the matching
30 * sequence occurs prior to the sequence being matched against.
32 * - The sequences in each linked list are always sorted by decreasing starting
33 * position. Therefore, the closest (smallest offset) matches are found
34 * first, which in many compression formats tend to be the cheapest to encode.
36 * - Although fast running time is not guaranteed due to the possibility of the
37 * lists getting very long, the worst degenerate behavior can be easily
38 * prevented by capping the number of nodes searched at each position.
40 * - If the compressor decides not to search for matches at a certain position,
41 * then that position can be quickly inserted without searching the list.
43 * - The algorithm is adaptable to sliding windows: just store the positions
44 * relative to a "base" value that is updated from time to time, and stop
45 * searching each list when the sequences get too far away.
47 * ---------------------------------------------------------------------------
51 * You must define MATCHFINDER_MAX_WINDOW_ORDER before including this header
52 * because that determines which integer type to use for positions. Since
53 * 16-bit integers are faster than 32-bit integers due to reduced memory usage
54 * (and therefore reduced cache pressure), the code only uses 32-bit integers if
55 * they are needed to represent all possible positions.
57 * The number of bytes that must be allocated for a given 'struct
58 * hc_matchfinder' must be gotten by calling hc_matchfinder_size().
60 * ----------------------------------------------------------------------------
64 * The longest_match() and skip_positions() functions are inlined into the
65 * compressors that use them. This isn't just about saving the overhead of a
66 * function call. These functions are intended to be called from the inner
67 * loops of compressors, where giving the compiler more control over register
68 * allocation is very helpful. There is also significant benefit to be gained
69 * from allowing the CPU to predict branches independently at each call site.
70 * For example, "lazy"-style compressors can be written with two calls to
71 * longest_match(), each of which starts with a different 'best_len' and
72 * therefore has significantly different performance characteristics.
74 * Although any hash function can be used, a multiplicative hash is fast and
77 * On some processors, it is significantly faster to extend matches by whole
78 * words (32 or 64 bits) instead of by individual bytes. For this to be the
79 * case, the processor must implement unaligned memory accesses efficiently and
80 * must have either a fast "find first set bit" instruction or a fast "find last
81 * set bit" instruction, depending on the processor's endianness.
83 * The code uses one loop for finding the first match and one loop for finding a
84 * longer match. Each of these loops is tuned for its respective task and in
85 * combination are faster than a single generalized loop that handles both
88 * The code also uses a tight inner loop that only compares the last and first
89 * bytes of a potential match. It is only when these bytes match that a full
90 * match extension is attempted.
92 * ----------------------------------------------------------------------------
95 #ifndef _HC_MATCHFINDER_H
96 #define _HC_MATCHFINDER_H
98 #ifndef MATCHFINDER_MAX_WINDOW_ORDER
99 # error "MATCHFINDER_MAX_WINDOW_ORDER must be defined!"
104 #include "wimlib/lz_extend.h"
105 #include "wimlib/lz_hash.h"
106 #include "wimlib/unaligned.h"
108 #if MATCHFINDER_MAX_WINDOW_ORDER < 14
109 # define HC_MATCHFINDER_HASH_ORDER 14
111 # define HC_MATCHFINDER_HASH_ORDER 15
114 #if MATCHFINDER_MAX_WINDOW_ORDER <= 16
120 struct hc_matchfinder {
121 pos_t hash_tab[1UL << HC_MATCHFINDER_HASH_ORDER];
125 /* Return the number of bytes that must be allocated for a 'hc_matchfinder' that
126 * can work with buffers up to the specified size. */
128 hc_matchfinder_size(size_t max_bufsize)
130 return sizeof(struct hc_matchfinder) + (max_bufsize * sizeof(pos_t));
133 /* Prepare the matchfinder for a new input buffer. */
135 hc_matchfinder_init(struct hc_matchfinder *mf)
137 memset(mf, 0, sizeof(*mf));
141 * Find the longest match longer than 'best_len' bytes.
144 * The matchfinder structure.
146 * Pointer to the beginning of the input buffer.
148 * Pointer to the next byte in the input buffer to process. This is the
149 * pointer to the sequence being matched against.
151 * Require a match longer than this length.
153 * The maximum permissible match length at this position.
155 * Stop searching if a match of at least this length is found.
156 * Must be <= @max_len.
158 * Limit on the number of potential matches to consider. Must be >= 1.
160 * If a match is found, its offset is returned in this location.
162 * Return the length of the match found, or 'best_len' if no match longer than
163 * 'best_len' was found.
165 static inline unsigned
166 hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
167 const u8 * const in_begin,
168 const u8 * const in_next,
170 const unsigned max_len,
171 const unsigned nice_len,
172 const unsigned max_search_depth,
173 unsigned *offset_ret)
175 unsigned depth_remaining = max_search_depth;
176 const u8 *best_matchptr = best_matchptr; /* uninitialized */
183 /* Insert the current sequence into the appropriate linked list. */
184 if (unlikely(max_len < LOAD_U24_REQUIRED_NBYTES))
186 first_3_bytes = load_u24_unaligned(in_next);
187 hash = lz_hash(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
188 cur_node = mf->hash_tab[hash];
189 mf->next_tab[in_next - in_begin] = cur_node;
190 mf->hash_tab[hash] = in_next - in_begin;
192 if (unlikely(best_len >= max_len))
195 /* Search the appropriate linked list for matches. */
202 /* No length 3 match found yet.
203 * Check the first 3 bytes. */
204 matchptr = &in_begin[cur_node];
206 if (load_u24_unaligned(matchptr) == first_3_bytes)
209 /* The first 3 bytes did not match. Keep trying. */
210 cur_node = mf->next_tab[cur_node];
211 if (!cur_node || !--depth_remaining)
215 /* Found a match of length >= 3. Extend it to its full length. */
216 best_matchptr = matchptr;
217 best_len = lz_extend(in_next, best_matchptr, 3, max_len);
218 if (best_len >= nice_len)
220 cur_node = mf->next_tab[cur_node];
221 if (!cur_node || !--depth_remaining)
227 matchptr = &in_begin[cur_node];
229 /* Already found a length 3 match. Try for a longer
230 * match; start by checking either the last 4 bytes and
231 * the first 4 bytes, or the last byte. (The last byte,
232 * the one which would extend the match length by 1, is
233 * the most important.) */
234 #if UNALIGNED_ACCESS_IS_FAST
235 if ((load_u32_unaligned(matchptr + best_len - 3) ==
236 load_u32_unaligned(in_next + best_len - 3)) &&
237 (load_u32_unaligned(matchptr) ==
238 load_u32_unaligned(in_next)))
240 if (matchptr[best_len] == in_next[best_len])
244 cur_node = mf->next_tab[cur_node];
245 if (!cur_node || !--depth_remaining)
249 #if UNALIGNED_ACCESS_IS_FAST
254 len = lz_extend(in_next, matchptr, len, max_len);
255 if (len > best_len) {
257 best_matchptr = matchptr;
258 if (best_len >= nice_len)
261 cur_node = mf->next_tab[cur_node];
262 if (!cur_node || !--depth_remaining)
266 *offset_ret = in_next - best_matchptr;
271 * Advance the matchfinder, but don't search for matches.
274 * The matchfinder structure.
276 * Pointer to the beginning of the input buffer.
278 * Pointer to the next byte in the input buffer to process.
280 * Pointer to the end of the input buffer.
282 * The number of bytes to advance. Must be > 0.
285 hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
293 if (unlikely(in_next + count >= in_end - LZ_HASH3_REQUIRED_NBYTES))
297 hash = lz_hash_3_bytes(in_next, HC_MATCHFINDER_HASH_ORDER);
298 mf->next_tab[in_next - in_begin] = mf->hash_tab[hash];
299 mf->hash_tab[hash] = in_next - in_begin;
304 #endif /* _HC_MATCHFINDER_H */