4 * Hash chain match-finder for Lempel-Ziv compression.
6 * Copyright (c) 2014 Eric Biggers. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include "wimlib/lz_mf.h"
37 #include "wimlib/util.h"
41 /* Number of hash buckets. This can be changed, but should be a power of 2 so
42 * that the correct hash bucket can be selected using a fast bitwise AND. */
43 #define LZ_HC_HASH_LEN (1 << 15)
45 /* Number of bytes from which the hash code is computed at each position. This
46 * can be changed, provided that lz_hc_hash() is updated as well. */
47 #define LZ_HC_HASH_BYTES 3
56 static u32 crc32_table[256];
57 static pthread_once_t crc32_table_filled = PTHREAD_ONCE_INIT;
62 for (u32 b = 0; b < 256; b++) {
64 for (int i = 0; i < 8; i++) {
66 r = (r >> 1) ^ 0xEDB88320;
74 /* This hash function is taken from the LZMA SDK. It seems to work well.
76 * TODO: Maybe use the SSE4.2 CRC32 instruction when available? */
78 lz_hc_hash(const u8 *p)
82 hash ^= crc32_table[p[0]];
84 hash ^= (u32)p[2] << 8;
86 return hash % LZ_HC_HASH_LEN;
90 lz_hc_set_default_params(struct lz_mf_params *params)
92 if (params->min_match_len < LZ_HC_HASH_BYTES)
93 params->min_match_len = LZ_HC_HASH_BYTES;
95 if (params->max_match_len == 0)
96 params->max_match_len = params->max_window_size;
98 if (params->max_search_depth == 0)
99 params->max_search_depth = 50;
101 if (params->nice_match_len == 0)
102 params->nice_match_len = 24;
104 if (params->nice_match_len < params->min_match_len)
105 params->nice_match_len = params->min_match_len;
107 if (params->nice_match_len > params->max_match_len)
108 params->nice_match_len = params->max_match_len;
112 lz_hc_params_valid(const struct lz_mf_params *_params)
114 struct lz_mf_params params = *_params;
116 lz_hc_set_default_params(¶ms);
118 /* Avoid edge case where min_match_len = 3, max_match_len = 2 */
119 return (params.min_match_len <= params.max_match_len);
123 lz_hc_get_needed_memory(u32 max_window_size)
127 len += LZ_HC_HASH_LEN;
128 len += max_window_size;
130 return len * sizeof(u32);
134 lz_hc_init(struct lz_mf *_mf)
136 struct lz_hc *mf = (struct lz_hc *)_mf;
138 lz_hc_set_default_params(&mf->base.params);
140 /* Allocate space for 'hash_tab' and 'prev_tab'. */
142 mf->hash_tab = MALLOC(lz_hc_get_needed_memory(mf->base.params.max_window_size));
146 mf->prev_tab = mf->hash_tab + LZ_HC_HASH_LEN;
148 /* Fill in the CRC32 table if not done already. */
149 pthread_once(&crc32_table_filled, crc32_init);
155 lz_hc_load_window(struct lz_mf *_mf, const u8 window[], u32 size)
157 struct lz_hc *mf = (struct lz_hc *)_mf;
159 memset(mf->hash_tab, 0, LZ_HC_HASH_LEN * sizeof(u32));
161 if (size >= LZ_HC_HASH_BYTES)
162 mf->next_hash = lz_hc_hash(window);
166 lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[])
168 struct lz_hc *mf = (struct lz_hc *)_mf;
169 const u8 * const window = mf->base.cur_window;
170 const u32 cur_pos = mf->base.cur_window_pos;
171 const u8 * const strptr = &window[cur_pos];
172 const u32 bytes_remaining = mf->base.cur_window_size - cur_pos;
173 u32 * const prev_tab = mf->prev_tab;
174 const u32 nice_len = min(bytes_remaining, mf->base.params.nice_match_len);
175 u32 best_len = mf->base.params.min_match_len - 1;
176 u32 depth_remaining = mf->base.params.max_search_depth;
181 if (unlikely(bytes_remaining <= mf->base.params.min_match_len))
184 /* Insert the current position into the appropriate hash chain and set
185 * 'cur_match' to the previous head.
187 * For a slight performance improvement, we do each hash calculation one
188 * position in advance and prefetch the necessary hash table entry. */
190 hash = mf->next_hash;
191 mf->next_hash = lz_hc_hash(strptr + 1);
192 prefetch(&mf->hash_tab[mf->next_hash]);
193 cur_match = mf->hash_tab[hash];
194 mf->hash_tab[hash] = cur_pos;
195 prev_tab[cur_pos] = cur_match;
197 for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) {
199 const u8 * const matchptr = &window[cur_match];
202 /* Considering a match at 'matchptr'. */
204 /* The bytes at index 'best_len' are the most likely to differ,
205 * so check them first.
207 * The bytes at indices 'best_len - 1' and '0' are less
208 * important to check separately. But doing so still gives a
209 * slight performance improvement, probably because they create
210 * separate branches for the CPU to predict independently of the
211 * branches in the main comparison loops. */
212 if (matchptr[best_len] != strptr[best_len] ||
213 matchptr[best_len - 1] != strptr[best_len - 1] ||
214 matchptr[0] != strptr[0])
217 for (len = 1; len < best_len - 1; len++)
218 if (matchptr[len] != strptr[len])
221 /* We now know the match length is at least 'best_len + 1'. */
226 if (++len == nice_len) {
227 /* 'nice_len' reached; don't waste time
228 * searching for longer matches. Extend the
229 * match as far as possible, record it, and
231 const u32 max_len = min(bytes_remaining,
232 mf->base.params.max_match_len);
233 while (len < max_len && strptr[len] == matchptr[len])
235 matches[num_matches++] = (struct lz_match) {
237 .offset = strptr - matchptr,
241 } while (matchptr[len] == strptr[len]);
243 /* Found a longer match, but 'nice_len' not yet reached. */
245 matches[num_matches++] = (struct lz_match) {
247 .offset = strptr - matchptr,
251 /* Continue to next match in the chain. */
256 mf->base.cur_window_pos++;
261 lz_hc_skip_position(struct lz_hc *mf)
263 const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
266 if (unlikely(bytes_remaining <= mf->base.params.min_match_len))
269 hash = mf->next_hash;
270 mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1);
271 prefetch(&mf->hash_tab[mf->next_hash]);
272 mf->prev_tab[mf->base.cur_window_pos] = mf->hash_tab[hash];
273 mf->hash_tab[hash] = mf->base.cur_window_pos;
276 mf->base.cur_window_pos++;
280 lz_hc_skip_positions(struct lz_mf *_mf, u32 n)
282 struct lz_hc *mf = (struct lz_hc *)_mf;
285 lz_hc_skip_position(mf);
290 lz_hc_destroy(struct lz_mf *_mf)
292 struct lz_hc *mf = (struct lz_hc *)_mf;
297 const struct lz_mf_ops lz_hash_chains_ops = {
298 .params_valid = lz_hc_params_valid,
299 .get_needed_memory = lz_hc_get_needed_memory,
301 .load_window = lz_hc_load_window,
302 .get_matches = lz_hc_get_matches,
303 .skip_positions = lz_hc_skip_positions,
304 .destroy = lz_hc_destroy,
305 .struct_size = sizeof(struct lz_hc),