4 * Hash chain match-finder for Lempel-Ziv compression.
6 * Copyright (c) 2014 Eric Biggers. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include "wimlib/lz_extend.h"
37 #include "wimlib/lz_hash3.h"
38 #include "wimlib/lz_mf.h"
39 #include "wimlib/util.h"
43 /* log2 of the number of buckets in the hash table. This can be changed. */
44 #define LZ_HC_HASH_ORDER 15
46 #define LZ_HC_HASH_LEN (1 << LZ_HC_HASH_ORDER)
50 u32 *hash_tab; /* followed by 'prev_tab' in memory */
55 lz_hc_hash(const u8 *p)
57 return lz_hash(p, LZ_HC_HASH_ORDER);
61 lz_hc_set_default_params(struct lz_mf_params *params)
63 if (params->min_match_len < LZ_HASH_NBYTES)
64 params->min_match_len = LZ_HASH_NBYTES;
66 if (params->max_match_len == 0)
67 params->max_match_len = params->max_window_size;
69 if (params->max_search_depth == 0)
70 params->max_search_depth = 50;
72 if (params->nice_match_len == 0)
73 params->nice_match_len = 24;
75 if (params->nice_match_len < params->min_match_len)
76 params->nice_match_len = params->min_match_len;
78 if (params->nice_match_len > params->max_match_len)
79 params->nice_match_len = params->max_match_len;
83 lz_hc_params_valid(const struct lz_mf_params *_params)
85 struct lz_mf_params params = *_params;
87 lz_hc_set_default_params(¶ms);
89 return (params.min_match_len <= params.max_match_len);
93 lz_hc_get_needed_memory(u32 max_window_size)
97 len += LZ_HC_HASH_LEN;
98 len += max_window_size;
100 return len * sizeof(u32);
104 lz_hc_init(struct lz_mf *_mf)
106 struct lz_hc *mf = (struct lz_hc *)_mf;
108 lz_hc_set_default_params(&mf->base.params);
110 mf->hash_tab = MALLOC(lz_hc_get_needed_memory(mf->base.params.max_window_size));
118 lz_hc_load_window(struct lz_mf *_mf, const u8 window[], u32 size)
120 struct lz_hc *mf = (struct lz_hc *)_mf;
122 memset(mf->hash_tab, 0, LZ_HC_HASH_LEN * sizeof(u32));
126 lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[])
128 struct lz_hc *mf = (struct lz_hc *)_mf;
129 const u8 * const window = mf->base.cur_window;
130 const u32 cur_pos = mf->base.cur_window_pos++;
131 const u8 * const strptr = &window[cur_pos];
132 const u32 bytes_remaining = mf->base.cur_window_size - cur_pos;
133 u32 * const prev_tab = mf->hash_tab + LZ_HC_HASH_LEN;
134 const u32 max_len = min(bytes_remaining, mf->base.params.max_match_len);
135 const u32 nice_len = min(max_len, mf->base.params.nice_match_len);
136 u32 best_len = mf->base.params.min_match_len - 1;
137 u32 depth_remaining = mf->base.params.max_search_depth;
138 struct lz_match *lz_matchptr = matches;
142 if (unlikely(bytes_remaining < LZ_HASH_REQUIRED_NBYTES + 1))
145 /* Insert the current position into the appropriate hash chain and set
146 * 'cur_match' to the previous head.
148 * For a slight performance improvement, we do each hash calculation one
149 * position in advance and prefetch the necessary hash table entry. */
151 hash = mf->next_hash;
152 mf->next_hash = lz_hc_hash(strptr + 1);
153 prefetch(&mf->hash_tab[mf->next_hash]);
154 cur_match = mf->hash_tab[hash];
155 mf->hash_tab[hash] = cur_pos;
156 prev_tab[cur_pos] = cur_match;
158 /* Ensure we can find a match of at least the requested length. */
159 if (unlikely(best_len >= max_len))
162 /* Search the appropriate hash chain for matches. */
163 for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) {
165 const u8 * const matchptr = &window[cur_match];
168 /* Considering the potential match at 'matchptr': is it longer
171 * The bytes at index 'best_len' are the most likely to differ,
172 * so check them first. */
173 if (matchptr[best_len] != strptr[best_len])
176 #if HAVE_FAST_LZ_EXTEND
177 if ((*(const u32 *)strptr & 0xFFFFFF) !=
178 (*(const u32 *)matchptr & 0xFFFFFF))
181 len = lz_extend(strptr, matchptr, 3, max_len);
183 if (len > best_len) {
186 *lz_matchptr++ = (struct lz_match) {
188 .offset = strptr - matchptr,
191 if (best_len >= nice_len)
195 #else /* HAVE_FAST_LZ_EXTEND */
197 /* The bytes at indices 'best_len - 1' and '0' are less
198 * important to check separately. But doing so still gives a
199 * slight performance improvement, at least on x86_64, probably
200 * because they create separate branches for the CPU to predict
201 * independently of the branches in the main comparison loops.
203 if (matchptr[best_len - 1] != strptr[best_len - 1] ||
204 matchptr[0] != strptr[0])
207 for (len = 1; len < best_len - 1; len++)
208 if (matchptr[len] != strptr[len])
211 /* The match is the longest found so far ---
212 * at least 'best_len' + 1 bytes. Continue extending it. */
214 if (++best_len != max_len && strptr[best_len] == matchptr[best_len])
215 while (++best_len != max_len)
216 if (strptr[best_len] != matchptr[best_len])
219 /* Record the match. */
220 *lz_matchptr++ = (struct lz_match) {
222 .offset = strptr - matchptr,
225 /* Terminate the search if 'nice_len' was reached. */
226 if (best_len >= nice_len)
228 #endif /* !HAVE_FAST_LZ_EXTEND */
231 /* Continue to next match in the chain. */
235 return lz_matchptr - matches;
239 lz_hc_skip_positions(struct lz_mf *_mf, u32 n)
241 struct lz_hc *mf = (struct lz_hc *)_mf;
242 u32 * const hash_tab = mf->hash_tab;
243 u32 * const prev_tab = hash_tab + LZ_HC_HASH_LEN;
244 const u8 * const window = mf->base.cur_window;
245 u32 cur_pos = mf->base.cur_window_pos;
246 u32 end_pos = cur_pos + n;
247 const u32 bytes_remaining = mf->base.cur_window_size - cur_pos;
251 mf->base.cur_window_pos = end_pos;
253 if (unlikely(bytes_remaining < n + (LZ_HASH_REQUIRED_NBYTES + 1) - 1)) {
254 /* Nearing end of window. */
255 if (unlikely(bytes_remaining < (LZ_HASH_REQUIRED_NBYTES + 1)))
258 end_pos = cur_pos + bytes_remaining - (LZ_HASH_REQUIRED_NBYTES + 1) + 1;
261 next_hash = mf->next_hash;
264 next_hash = lz_hc_hash(&window[cur_pos + 1]);
265 prev_tab[cur_pos] = hash_tab[hash];
266 hash_tab[hash] = cur_pos;
267 } while (++cur_pos != end_pos);
269 prefetch(&hash_tab[next_hash]);
270 mf->next_hash = next_hash;
274 lz_hc_destroy(struct lz_mf *_mf)
276 struct lz_hc *mf = (struct lz_hc *)_mf;
281 const struct lz_mf_ops lz_hash_chains_ops = {
282 .params_valid = lz_hc_params_valid,
283 .get_needed_memory = lz_hc_get_needed_memory,
285 .load_window = lz_hc_load_window,
286 .get_matches = lz_hc_get_matches,
287 .skip_positions = lz_hc_skip_positions,
288 .destroy = lz_hc_destroy,
289 .struct_size = sizeof(struct lz_hc),