4 * Copyright (c) 2014 Eric Biggers. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
27 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "wimlib/lz_extend.h"
33 #include "wimlib/lz_hash3.h"
34 #include "wimlib/matchfinder_common.h"
35 #include "wimlib/unaligned.h"
37 #ifndef HC_MATCHFINDER_HASH_ORDER
38 # if MATCHFINDER_WINDOW_ORDER < 14
39 # define HC_MATCHFINDER_HASH_ORDER 14
41 # define HC_MATCHFINDER_HASH_ORDER 15
45 #define HC_MATCHFINDER_HASH_LENGTH (1UL << HC_MATCHFINDER_HASH_ORDER)
47 #define HC_MATCHFINDER_TOTAL_LENGTH \
48 (HC_MATCHFINDER_HASH_LENGTH + MATCHFINDER_WINDOW_SIZE)
50 struct hc_matchfinder {
52 pos_t mf_data[HC_MATCHFINDER_TOTAL_LENGTH];
54 pos_t hash_tab[HC_MATCHFINDER_HASH_LENGTH];
55 pos_t child_tab[MATCHFINDER_WINDOW_SIZE];
58 } _aligned_attribute(MATCHFINDER_ALIGNMENT);
61 * Call before running the first byte through the matchfinder.
64 hc_matchfinder_init(struct hc_matchfinder *mf)
66 matchfinder_init(mf->hash_tab, HC_MATCHFINDER_HASH_LENGTH);
69 #if MATCHFINDER_IS_SLIDING
71 hc_matchfinder_slide_window(struct hc_matchfinder *mf)
73 matchfinder_rebase(mf->mf_data, HC_MATCHFINDER_TOTAL_LENGTH);
78 * Find the longest match longer than 'best_len'.
81 * The matchfinder structure.
83 * Pointer to the next byte in the input buffer to process _at the last
84 * time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
86 * Pointer to the next byte in the input buffer to process. This is the
87 * pointer to the bytes being matched against.
89 * Require a match at least this long.
91 * Maximum match length to return.
93 * Stop searching if a match of at least this length is found.
95 * Limit on the number of potential matches to consider.
97 * The match offset is returned here.
99 * Return the length of the match found, or 'best_len' if no match longer than
100 * 'best_len' was found.
102 static inline unsigned
103 hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
104 const u8 * const in_base,
105 const u8 * const in_next,
107 const unsigned max_len,
108 const unsigned nice_len,
109 const unsigned max_search_depth,
110 unsigned *offset_ret)
112 unsigned depth_remaining = max_search_depth;
113 const u8 *best_matchptr = best_matchptr; /* uninitialized */
120 /* Insert the current sequence into the appropriate hash chain. */
121 if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES))
123 first_3_bytes = load_u24_unaligned(in_next);
124 hash = lz_hash_u24(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
125 cur_match = mf->hash_tab[hash];
126 mf->child_tab[in_next - in_base] = cur_match;
127 mf->hash_tab[hash] = in_next - in_base;
129 if (unlikely(best_len >= max_len))
132 /* Search the appropriate hash chain for matches. */
134 if (!(matchfinder_match_in_window(cur_match, in_base, in_next)))
139 /* No length 3 match found yet.
140 * Check the first 3 bytes. */
141 matchptr = &in_base[cur_match];
143 if (load_u24_unaligned(matchptr) == first_3_bytes)
146 /* Not a match; keep trying. */
147 cur_match = mf->child_tab[
148 matchfinder_slot_for_match(cur_match)];
149 if (!matchfinder_match_in_window(cur_match,
152 if (!--depth_remaining)
156 /* Found a length 3 match. */
157 best_matchptr = matchptr;
158 best_len = lz_extend(in_next, best_matchptr, 3, max_len);
159 if (best_len >= nice_len)
161 cur_match = mf->child_tab[matchfinder_slot_for_match(cur_match)];
162 if (!matchfinder_match_in_window(cur_match, in_base, in_next))
164 if (!--depth_remaining)
170 matchptr = &in_base[cur_match];
172 /* Already found a length 3 match. Try for a longer match;
173 * start by checking the last 2 bytes and the first 4 bytes. */
174 #if UNALIGNED_ACCESS_IS_FAST
175 if ((load_u32_unaligned(matchptr + best_len - 3) ==
176 load_u32_unaligned(in_next + best_len - 3)) &&
177 (load_u32_unaligned(matchptr) ==
178 load_u32_unaligned(in_next)))
180 if (matchptr[best_len] == in_next[best_len])
184 cur_match = mf->child_tab[matchfinder_slot_for_match(cur_match)];
185 if (!matchfinder_match_in_window(cur_match, in_base, in_next))
187 if (!--depth_remaining)
191 #if UNALIGNED_ACCESS_IS_FAST
196 len = lz_extend(in_next, matchptr, len, max_len);
197 if (len > best_len) {
199 best_matchptr = matchptr;
200 if (best_len >= nice_len)
203 cur_match = mf->child_tab[matchfinder_slot_for_match(cur_match)];
204 if (!matchfinder_match_in_window(cur_match, in_base, in_next))
206 if (!--depth_remaining)
210 *offset_ret = in_next - best_matchptr;
215 * Advance the match-finder, but don't search for matches.
218 * The matchfinder structure.
220 * Pointer to the next byte in the input buffer to process _at the last
221 * time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
223 * Pointer to the next byte in the input buffer to process.
225 * Pointer to the end of the input buffer.
227 * Number of bytes to skip; must be > 0.
230 hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
238 if (unlikely(in_next + count >= in_end - LZ_HASH_REQUIRED_NBYTES))
242 hash = lz_hash(in_next, HC_MATCHFINDER_HASH_ORDER);
243 mf->child_tab[in_next - in_base] = mf->hash_tab[hash];
244 mf->hash_tab[hash] = in_next - in_base;