]> wimlib.net Git - wimlib/blob - include/wimlib/bt_matchfinder.h
Misc. cleanups
[wimlib] / include / wimlib / bt_matchfinder.h
1 /*
2  * bt_matchfinder.h
3  *
4  * Copyright (c) 2014 Eric Biggers.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
27  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #ifndef _BT_MATCHFINDER_H
31 #define _BT_MATCHFINDER_H
32
33 #include "wimlib/lz_extend.h"
34 #include "wimlib/lz_hash3.h"
35 #include "wimlib/matchfinder_common.h"
36 #include "wimlib/unaligned.h"
37
38 #ifndef BT_MATCHFINDER_HASH_ORDER
39 #  if MATCHFINDER_WINDOW_ORDER < 14
40 #    define BT_MATCHFINDER_HASH_ORDER 14
41 #  else
42 #    define BT_MATCHFINDER_HASH_ORDER 15
43 #  endif
44 #endif
45
46 #define BT_MATCHFINDER_HASH_LENGTH      (1UL << BT_MATCHFINDER_HASH_ORDER)
47
48 #define BT_MATCHFINDER_TOTAL_LENGTH     \
49         (BT_MATCHFINDER_HASH_LENGTH + (2UL * MATCHFINDER_WINDOW_SIZE))
50
51 struct bt_matchfinder {
52         union {
53                 pos_t mf_data[BT_MATCHFINDER_TOTAL_LENGTH];
54                 struct {
55                         pos_t hash_tab[BT_MATCHFINDER_HASH_LENGTH];
56                         pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE];
57                 };
58         };
59 } _aligned_attribute(MATCHFINDER_ALIGNMENT);
60
61 static inline void
62 bt_matchfinder_init(struct bt_matchfinder *mf)
63 {
64         matchfinder_init(mf->hash_tab, BT_MATCHFINDER_HASH_LENGTH);
65 }
66
67 #if MATCHFINDER_IS_SLIDING
68 static inline void
69 bt_matchfinder_slide_window(struct bt_matchfinder *mf)
70 {
71         matchfinder_rebase(mf->mf_data, BT_MATCHFINDER_TOTAL_LENGTH);
72 }
73 #endif
74
75 static inline unsigned
76 bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
77                            const u8 * const in_base,
78                            const u8 * const in_next,
79                            const unsigned min_len,
80                            const unsigned max_len,
81                            const unsigned nice_len,
82                            const unsigned max_search_depth,
83                            unsigned long *prev_hash,
84                            struct lz_match * const restrict matches)
85 {
86         struct lz_match *lz_matchptr = matches;
87         unsigned depth_remaining = max_search_depth;
88         unsigned hash;
89         pos_t cur_match;
90         const u8 *matchptr;
91         unsigned best_len;
92         pos_t *pending_lt_ptr, *pending_gt_ptr;
93         unsigned best_lt_len, best_gt_len;
94         unsigned len;
95         pos_t *children;
96
97         if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES + 1))
98                 return 0;
99
100         hash = *prev_hash;
101         *prev_hash = lz_hash(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
102         prefetch(&mf->hash_tab[*prev_hash]);
103         cur_match = mf->hash_tab[hash];
104         mf->hash_tab[hash] = in_next - in_base;
105
106         best_len = min_len - 1;
107         pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
108         pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
109         best_lt_len = 0;
110         best_gt_len = 0;
111         for (;;) {
112                 if (!matchfinder_match_in_window(cur_match,
113                                                  in_base, in_next) ||
114                     !depth_remaining--)
115                 {
116                         *pending_lt_ptr = MATCHFINDER_INITVAL;
117                         *pending_gt_ptr = MATCHFINDER_INITVAL;
118                         return lz_matchptr - matches;
119                 }
120
121                 matchptr = &in_base[cur_match];
122                 len = min(best_lt_len, best_gt_len);
123
124                 children = &mf->child_tab[(unsigned long)
125                                 matchfinder_slot_for_match(cur_match) << 1];
126
127                 if (matchptr[len] == in_next[len]) {
128
129                         len = lz_extend(in_next, matchptr, len + 1, max_len);
130
131                         if (len > best_len) {
132                                 best_len = len;
133
134                                 lz_matchptr->length = len;
135                                 lz_matchptr->offset = in_next - matchptr;
136                                 lz_matchptr++;
137
138                                 if (len >= nice_len) {
139                                         *pending_lt_ptr = children[0];
140                                         *pending_gt_ptr = children[1];
141                                         return lz_matchptr - matches;
142                                 }
143                         }
144                 }
145
146                 if (matchptr[len] < in_next[len]) {
147                         *pending_lt_ptr = cur_match;
148                         pending_lt_ptr = &children[1];
149                         cur_match = *pending_lt_ptr;
150                         best_lt_len = len;
151                 } else {
152                         *pending_gt_ptr = cur_match;
153                         pending_gt_ptr = &children[0];
154                         cur_match = *pending_gt_ptr;
155                         best_gt_len = len;
156                 }
157         }
158 }
159
160 static inline void
161 bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
162                              const u8 * const in_base,
163                              const u8 * const in_next,
164                              const u8 * const in_end,
165                              const unsigned nice_len,
166                              const unsigned max_search_depth,
167                              unsigned long *prev_hash)
168 {
169         unsigned depth_remaining = max_search_depth;
170         unsigned hash;
171         pos_t cur_match;
172         const u8 *matchptr;
173         pos_t *pending_lt_ptr, *pending_gt_ptr;
174         unsigned best_lt_len, best_gt_len;
175         unsigned len;
176         pos_t *children;
177
178         if (unlikely(in_end - in_next < LZ_HASH_REQUIRED_NBYTES + 1))
179                 return;
180
181         hash = *prev_hash;
182         *prev_hash = lz_hash(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
183         prefetch(&mf->hash_tab[*prev_hash]);
184         cur_match = mf->hash_tab[hash];
185         mf->hash_tab[hash] = in_next - in_base;
186
187         depth_remaining = max_search_depth;
188         pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
189         pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
190         best_lt_len = 0;
191         best_gt_len = 0;
192         for (;;) {
193                 if (!matchfinder_match_in_window(cur_match,
194                                                  in_base, in_next) ||
195                     !depth_remaining--)
196                 {
197                         *pending_lt_ptr = MATCHFINDER_INITVAL;
198                         *pending_gt_ptr = MATCHFINDER_INITVAL;
199                         return;
200                 }
201
202                 matchptr = &in_base[cur_match];
203                 len = min(best_lt_len, best_gt_len);
204
205                 children = &mf->child_tab[(unsigned long)
206                                 matchfinder_slot_for_match(cur_match) << 1];
207
208                 if (matchptr[len] == in_next[len]) {
209                         len = lz_extend(in_next, matchptr, len + 1, nice_len);
210                         if (len == nice_len) {
211                                 *pending_lt_ptr = children[0];
212                                 *pending_gt_ptr = children[1];
213                                 return;
214                         }
215                 }
216
217                 if (matchptr[len] < in_next[len]) {
218                         *pending_lt_ptr = cur_match;
219                         pending_lt_ptr = &children[1];
220                         cur_match = *pending_lt_ptr;
221                         best_lt_len = len;
222                 } else {
223                         *pending_gt_ptr = cur_match;
224                         pending_gt_ptr = &children[0];
225                         cur_match = *pending_gt_ptr;
226                         best_gt_len = len;
227                 }
228         }
229 }
230
231 #endif /* _BT_MATCHFINDER_H */