4 * Copyright (c) 2014 Eric Biggers. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
27 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #ifndef _HC_MATCHFINDER_H
31 #define _HC_MATCHFINDER_H
33 #include "wimlib/lz_extend.h"
34 #include "wimlib/lz_hash3.h"
35 #include "wimlib/matchfinder_common.h"
36 #include "wimlib/unaligned.h"
38 #ifndef HC_MATCHFINDER_HASH_ORDER
39 # if MATCHFINDER_WINDOW_ORDER < 14
40 # define HC_MATCHFINDER_HASH_ORDER 14
42 # define HC_MATCHFINDER_HASH_ORDER 15
46 #define HC_MATCHFINDER_HASH_LENGTH (1UL << HC_MATCHFINDER_HASH_ORDER)
48 #define HC_MATCHFINDER_TOTAL_LENGTH \
49 (HC_MATCHFINDER_HASH_LENGTH + MATCHFINDER_WINDOW_SIZE)
51 struct hc_matchfinder {
53 pos_t mf_data[HC_MATCHFINDER_TOTAL_LENGTH];
55 pos_t hash_tab[HC_MATCHFINDER_HASH_LENGTH];
56 pos_t next_tab[MATCHFINDER_WINDOW_SIZE];
59 } _aligned_attribute(MATCHFINDER_ALIGNMENT);
62 * Call before running the first byte through the matchfinder.
65 hc_matchfinder_init(struct hc_matchfinder *mf)
67 matchfinder_init(mf->hash_tab, HC_MATCHFINDER_HASH_LENGTH);
70 #if MATCHFINDER_IS_SLIDING
72 hc_matchfinder_slide_window(struct hc_matchfinder *mf)
74 matchfinder_rebase(mf->mf_data, HC_MATCHFINDER_TOTAL_LENGTH);
79 * Find the longest match longer than 'best_len'.
82 * The matchfinder structure.
84 * Pointer to the next byte in the input buffer to process _at the last
85 * time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
87 * Pointer to the next byte in the input buffer to process. This is the
88 * pointer to the bytes being matched against.
90 * Require a match at least this long.
92 * Maximum match length to return.
94 * Stop searching if a match of at least this length is found.
96 * Limit on the number of potential matches to consider.
98 * The match offset is returned here.
100 * Return the length of the match found, or 'best_len' if no match longer than
101 * 'best_len' was found.
103 static inline unsigned
104 hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
105 const u8 * const in_base,
106 const u8 * const in_next,
108 const unsigned max_len,
109 const unsigned nice_len,
110 const unsigned max_search_depth,
111 unsigned *offset_ret)
113 unsigned depth_remaining = max_search_depth;
114 const u8 *best_matchptr = best_matchptr; /* uninitialized */
121 /* Insert the current sequence into the appropriate hash chain. */
122 if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES))
124 first_3_bytes = load_u24_unaligned(in_next);
125 hash = lz_hash_u24(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
126 cur_match = mf->hash_tab[hash];
127 mf->next_tab[in_next - in_base] = cur_match;
128 mf->hash_tab[hash] = in_next - in_base;
130 if (unlikely(best_len >= max_len))
133 /* Search the appropriate hash chain for matches. */
135 if (!(matchfinder_match_in_window(cur_match, in_base, in_next)))
140 /* No length 3 match found yet.
141 * Check the first 3 bytes. */
142 matchptr = &in_base[cur_match];
144 if (load_u24_unaligned(matchptr) == first_3_bytes)
147 /* Not a match; keep trying. */
148 cur_match = mf->next_tab[
149 matchfinder_slot_for_match(cur_match)];
150 if (!matchfinder_match_in_window(cur_match,
153 if (!--depth_remaining)
157 /* Found a length 3 match. */
158 best_matchptr = matchptr;
159 best_len = lz_extend(in_next, best_matchptr, 3, max_len);
160 if (best_len >= nice_len)
162 cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
163 if (!matchfinder_match_in_window(cur_match, in_base, in_next))
165 if (!--depth_remaining)
171 matchptr = &in_base[cur_match];
173 /* Already found a length 3 match. Try for a longer match;
174 * start by checking the last 2 bytes and the first 4 bytes. */
175 #if UNALIGNED_ACCESS_IS_FAST
176 if ((load_u32_unaligned(matchptr + best_len - 3) ==
177 load_u32_unaligned(in_next + best_len - 3)) &&
178 (load_u32_unaligned(matchptr) ==
179 load_u32_unaligned(in_next)))
181 if (matchptr[best_len] == in_next[best_len])
185 cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
186 if (!matchfinder_match_in_window(cur_match, in_base, in_next))
188 if (!--depth_remaining)
192 if (UNALIGNED_ACCESS_IS_FAST)
196 len = lz_extend(in_next, matchptr, len, max_len);
197 if (len > best_len) {
199 best_matchptr = matchptr;
200 if (best_len >= nice_len)
203 cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
204 if (!matchfinder_match_in_window(cur_match, in_base, in_next))
206 if (!--depth_remaining)
210 *offset_ret = in_next - best_matchptr;
215 * Advance the match-finder, but don't search for matches.
218 * The matchfinder structure.
220 * Pointer to the next byte in the input buffer to process _at the last
221 * time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
223 * Pointer to the next byte in the input buffer to process.
225 * Pointer to the end of the input buffer.
227 * Number of bytes to skip; must be > 0.
230 hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
238 if (unlikely(in_next + count >= in_end - LZ_HASH_REQUIRED_NBYTES))
242 hash = lz_hash(in_next, HC_MATCHFINDER_HASH_ORDER);
243 mf->next_tab[in_next - in_base] = mf->hash_tab[hash];
244 mf->hash_tab[hash] = in_next - in_base;
249 #endif /* _HC_MATCHFINDER_H */