4 * Common code for Lempel-Ziv matchfinding.
6 * Copyright (c) 2014 Eric Biggers. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #ifndef _MATCHFINDER_COMMON_H
33 #define _MATCHFINDER_COMMON_H
35 #include "wimlib/types.h"
39 #ifndef MATCHFINDER_WINDOW_ORDER
40 # error "MATCHFINDER_WINDOW_ORDER must be defined!"
43 #ifndef MATCHFINDER_IS_SLIDING
44 # error "MATCHFINDER_IS_SLIDING must be defined!"
47 #define MATCHFINDER_WINDOW_SIZE ((size_t)1 << MATCHFINDER_WINDOW_ORDER)
49 #if MATCHFINDER_IS_SLIDING
50 # include "matchfinder_sliding.h"
52 # include "matchfinder_nonsliding.h"
55 #define MATCHFINDER_ALIGNMENT 8
58 # include "matchfinder_avx2.h"
59 # if MATCHFINDER_ALIGNMENT < 32
60 # undef MATCHFINDER_ALIGNMENT
61 # define MATCHFINDER_ALIGNMENT 32
66 # include "matchfinder_sse2.h"
67 # if MATCHFINDER_ALIGNMENT < 16
68 # undef MATCHFINDER_ALIGNMENT
69 # define MATCHFINDER_ALIGNMENT 16
74 * Representation of a match.
78 /* The number of bytes matched. */
81 /* The offset back from the current position that was matched. */
86 matchfinder_memset_init_okay(void)
88 /* All bytes must match in order to use memset. */
89 const pos_t v = MATCHFINDER_INITVAL;
90 if (sizeof(pos_t) == 2)
91 return (u8)v == (u8)(v >> 8);
92 if (sizeof(pos_t) == 4)
93 return (u8)v == (u8)(v >> 8) &&
94 (u8)v == (u8)(v >> 16) &&
95 (u8)v == (u8)(v >> 24);
100 * Initialize the hash table portion of the matchfinder.
102 * Essentially, this is an optimized memset().
104 * 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary.
107 matchfinder_init(pos_t *data, size_t num_entries)
109 const size_t size = num_entries * sizeof(data[0]);
112 if (matchfinder_init_avx2(data, size))
117 if (matchfinder_init_sse2(data, size))
121 if (matchfinder_memset_init_okay()) {
122 memset(data, (u8)MATCHFINDER_INITVAL, size);
126 for (size_t i = 0; i < num_entries; i++)
127 data[i] = MATCHFINDER_INITVAL;
130 #if MATCHFINDER_IS_SLIDING
132 * Slide the matchfinder by WINDOW_SIZE bytes.
134 * This must be called just after each WINDOW_SIZE bytes have been run through
137 * This will subtract WINDOW_SIZE bytes from each entry in the array specified.
138 * The effect is that all entries are updated to be relative to the current
139 * position, rather than the position WINDOW_SIZE bytes prior.
141 * Underflow is detected and replaced with signed saturation. This ensures that
142 * once the sliding window has passed over a position, that position forever
143 * remains out of bounds.
145 * The array passed in must contain all matchfinder data that is
146 * position-relative. Concretely, this will include the hash table as well as
147 * the table of positions that is used to link together the sequences in each
148 * hash bucket. Note that in the latter table, the links are 1-ary in the case
149 * of "hash chains", and 2-ary in the case of "binary trees". In either case,
150 * the links need to be rebased in the same way.
153 matchfinder_rebase(pos_t *data, size_t num_entries)
155 const size_t size = num_entries * sizeof(data[0]);
158 if (matchfinder_rebase_avx2(data, size))
163 if (matchfinder_rebase_sse2(data, size))
167 if (MATCHFINDER_WINDOW_SIZE == 32768) {
168 /* Branchless version for 32768 byte windows. If the value was
169 * already negative, clear all bits except the sign bit; this
170 * changes the value to -32768. Otherwise, set the sign bit;
171 * this is equivalent to subtracting 32768. */
172 for (size_t i = 0; i < num_entries; i++) {
174 u16 sign_bit = v & 0x8000;
175 v &= sign_bit - ((sign_bit >> 15) ^ 1);
182 for (size_t i = 0; i < num_entries; i++) {
184 data[i] -= (pos_t)-MATCHFINDER_WINDOW_SIZE;
186 data[i] = (pos_t)-MATCHFINDER_WINDOW_SIZE;
189 #endif /* MATCHFINDER_IS_SLIDING */
191 #endif /* _MATCHFINDER_COMMON_H */