Faster XPRESS compression
[wimlib] / include / wimlib / bt_matchfinder.h
1 /*
2  * bt_matchfinder.h
3  *
4  * Copyright (c) 2014 Eric Biggers.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
27  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #pragma once
31
32 #include "wimlib/lz_extend.h"
33 #include "wimlib/lz_hash3.h"
34 #include "wimlib/matchfinder_common.h"
35 #include "wimlib/unaligned.h"
36
37 #ifndef BT_MATCHFINDER_HASH_ORDER
38 #  if MATCHFINDER_WINDOW_ORDER < 14
39 #    define BT_MATCHFINDER_HASH_ORDER 14
40 #  else
41 #    define BT_MATCHFINDER_HASH_ORDER 15
42 #  endif
43 #endif
44
45 #define BT_MATCHFINDER_HASH_LENGTH      (1UL << BT_MATCHFINDER_HASH_ORDER)
46
47 #define BT_MATCHFINDER_TOTAL_LENGTH     \
48         (BT_MATCHFINDER_HASH_LENGTH + (2UL * MATCHFINDER_WINDOW_SIZE))
49
50 struct bt_matchfinder {
51         union {
52                 pos_t mf_data[BT_MATCHFINDER_TOTAL_LENGTH];
53                 struct {
54                         pos_t hash_tab[BT_MATCHFINDER_HASH_LENGTH];
55                         pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE];
56                 };
57         };
58 } _aligned_attribute(MATCHFINDER_ALIGNMENT);
59
60 static inline void
61 bt_matchfinder_init(struct bt_matchfinder *mf)
62 {
63         matchfinder_init(mf->hash_tab, BT_MATCHFINDER_HASH_LENGTH);
64 }
65
66 #if MATCHFINDER_IS_SLIDING
67 static inline void
68 bt_matchfinder_slide_window(struct bt_matchfinder *mf)
69 {
70         matchfinder_rebase(mf->mf_data, BT_MATCHFINDER_TOTAL_LENGTH);
71 }
72 #endif
73
74 static inline unsigned
75 bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
76                            const u8 * const in_base,
77                            const u8 * const in_next,
78                            const unsigned min_len,
79                            const unsigned max_len,
80                            const unsigned nice_len,
81                            const unsigned max_search_depth,
82                            unsigned long *prev_hash,
83                            struct lz_match * const restrict matches)
84 {
85         struct lz_match *lz_matchptr = matches;
86         unsigned depth_remaining = max_search_depth;
87         unsigned hash;
88         pos_t cur_match;
89         const u8 *matchptr;
90         unsigned best_len;
91         pos_t *pending_lt_ptr, *pending_gt_ptr;
92         unsigned best_lt_len, best_gt_len;
93         unsigned len;
94         pos_t *children;
95
96         if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES + 1))
97                 return 0;
98
99         hash = *prev_hash;
100         *prev_hash = lz_hash(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
101         prefetch(&mf->hash_tab[*prev_hash]);
102         cur_match = mf->hash_tab[hash];
103         mf->hash_tab[hash] = in_next - in_base;
104
105         best_len = min_len - 1;
106         pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
107         pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
108         best_lt_len = 0;
109         best_gt_len = 0;
110         for (;;) {
111                 if (!matchfinder_match_in_window(cur_match,
112                                                  in_base, in_next) ||
113                     !depth_remaining--)
114                 {
115                         *pending_lt_ptr = MATCHFINDER_INITVAL;
116                         *pending_gt_ptr = MATCHFINDER_INITVAL;
117                         return lz_matchptr - matches;
118                 }
119
120                 matchptr = &in_base[cur_match];
121                 len = min(best_lt_len, best_gt_len);
122
123                 children = &mf->child_tab[(unsigned long)
124                                 matchfinder_slot_for_match(cur_match) << 1];
125
126                 if (matchptr[len] == in_next[len]) {
127
128                         len = lz_extend(in_next, matchptr, len + 1, max_len);
129
130                         if (len > best_len) {
131                                 best_len = len;
132
133                                 lz_matchptr->length = len;
134                                 lz_matchptr->offset = in_next - matchptr;
135                                 lz_matchptr++;
136
137                                 if (len >= nice_len) {
138                                         *pending_lt_ptr = children[0];
139                                         *pending_gt_ptr = children[1];
140                                         return lz_matchptr - matches;
141                                 }
142                         }
143                 }
144
145                 if (matchptr[len] < in_next[len]) {
146                         *pending_lt_ptr = cur_match;
147                         pending_lt_ptr = &children[1];
148                         cur_match = *pending_lt_ptr;
149                         best_lt_len = len;
150                 } else {
151                         *pending_gt_ptr = cur_match;
152                         pending_gt_ptr = &children[0];
153                         cur_match = *pending_gt_ptr;
154                         best_gt_len = len;
155                 }
156         }
157 }
158
159 static inline void
160 bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
161                              const u8 * const in_base,
162                              const u8 * const in_next,
163                              const u8 * const in_end,
164                              const unsigned nice_len,
165                              const unsigned max_search_depth,
166                              unsigned long *prev_hash)
167 {
168         unsigned depth_remaining = max_search_depth;
169         unsigned hash;
170         pos_t cur_match;
171         const u8 *matchptr;
172         pos_t *pending_lt_ptr, *pending_gt_ptr;
173         unsigned best_lt_len, best_gt_len;
174         unsigned len;
175         pos_t *children;
176
177         if (unlikely(in_end - in_next < LZ_HASH_REQUIRED_NBYTES + 1))
178                 return;
179
180         hash = *prev_hash;
181         *prev_hash = lz_hash(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
182         prefetch(&mf->hash_tab[*prev_hash]);
183         cur_match = mf->hash_tab[hash];
184         mf->hash_tab[hash] = in_next - in_base;
185
186         depth_remaining = max_search_depth;
187         pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
188         pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
189         best_lt_len = 0;
190         best_gt_len = 0;
191         for (;;) {
192                 if (!matchfinder_match_in_window(cur_match,
193                                                  in_base, in_next) ||
194                     !depth_remaining--)
195                 {
196                         *pending_lt_ptr = MATCHFINDER_INITVAL;
197                         *pending_gt_ptr = MATCHFINDER_INITVAL;
198                         return;
199                 }
200
201                 matchptr = &in_base[cur_match];
202                 len = min(best_lt_len, best_gt_len);
203
204                 children = &mf->child_tab[(unsigned long)
205                                 matchfinder_slot_for_match(cur_match) << 1];
206
207                 if (matchptr[len] == in_next[len]) {
208                         len = lz_extend(in_next, matchptr, len + 1, nice_len);
209                         if (len == nice_len) {
210                                 *pending_lt_ptr = children[0];
211                                 *pending_gt_ptr = children[1];
212                                 return;
213                         }
214                 }
215
216                 if (matchptr[len] < in_next[len]) {
217                         *pending_lt_ptr = cur_match;
218                         pending_lt_ptr = &children[1];
219                         cur_match = *pending_lt_ptr;
220                         best_lt_len = len;
221                 } else {
222                         *pending_gt_ptr = cur_match;
223                         pending_gt_ptr = &children[0];
224                         cur_match = *pending_gt_ptr;
225                         best_gt_len = len;
226                 }
227         }
228 }