4 * A match-finder for Lempel-Ziv compression based on bottom-up construction and
5 * traversal of the Longest Common Prefix (LCP) interval tree.
7 * Copyright 2022 Eric Biggers
9 * Permission is hereby granted, free of charge, to any person
10 * obtaining a copy of this software and associated documentation
11 * files (the "Software"), to deal in the Software without
12 * restriction, including without limitation the rights to use,
13 * copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 * OTHER DEALINGS IN THE SOFTWARE.
37 #include "wimlib/divsufsort.h"
38 #include "wimlib/lcpit_matchfinder.h"
39 #include "wimlib/util.h"
42 #define LCP_MAX (((u32)1 << LCP_BITS) - 1)
43 #define LCP_SHIFT (32 - LCP_BITS)
44 #define LCP_MASK (LCP_MAX << LCP_SHIFT)
45 #define POS_MASK (((u32)1 << (32 - LCP_BITS)) - 1)
46 #define MAX_NORMAL_BUFSIZE (POS_MASK + 1)
48 #define HUGE_LCP_BITS 7
49 #define HUGE_LCP_MAX (((u32)1 << HUGE_LCP_BITS) - 1)
50 #define HUGE_LCP_SHIFT (64 - HUGE_LCP_BITS)
51 #define HUGE_LCP_MASK ((u64)HUGE_LCP_MAX << HUGE_LCP_SHIFT)
52 #define HUGE_POS_MASK 0xFFFFFFFF
53 #define MAX_HUGE_BUFSIZE ((u64)HUGE_POS_MASK + 1)
54 #define HUGE_UNVISITED_TAG 0x100000000
56 #define PREFETCH_SAFETY 5
59 * Build the LCP (Longest Common Prefix) array in linear time.
61 * LCP[r] will be the length of the longest common prefix between the suffixes
62 * with positions SA[r - 1] and SA[r]. LCP[0] will be undefined.
64 * Algorithm taken from Kasai et al. (2001), but modified slightly:
66 * - With bytes there is no realistic way to reserve a unique symbol for
67 * end-of-buffer, so use explicit checks for end-of-buffer.
69 * - For decreased memory usage and improved memory locality, pack the two
70 * logically distinct SA and LCP arrays into a single array SA_and_LCP.
72 * - Since SA_and_LCP is accessed randomly, improve the cache behavior by
73 * reading several entries ahead in ISA and prefetching the upcoming
76 * - If an LCP value is less than the minimum match length, then store 0. This
77 * avoids having to do comparisons against the minimum match length later.
79 * - If an LCP value is greater than the "nice match length", then store the
80 * "nice match length". This caps the number of bits needed to store each
81 * LCP value, and this caps the depth of the LCP-interval tree, without
82 * usually hurting the compression ratio too much.
86 * Kasai et al. 2001. Linear-Time Longest-Common-Prefix Computation in
87 * Suffix Arrays and Its Applications. CPM '01 Proceedings of the 12th
88 * Annual Symposium on Combinatorial Pattern Matching pp. 181-192.
91 build_LCP(u32 SA_and_LCP[restrict], const u32 ISA[restrict],
92 const u8 T[restrict], const u32 n,
93 const u32 min_lcp, const u32 max_lcp)
96 for (u32 i = 0; i < n; i++) {
98 prefetchw(&SA_and_LCP[ISA[i + PREFETCH_SAFETY]]);
100 const u32 j = SA_and_LCP[r - 1] & POS_MASK;
101 const u32 lim = min(n - i, n - j);
102 while (h < lim && T[i + h] == T[j + h])
105 if (stored_lcp < min_lcp)
107 else if (stored_lcp > max_lcp)
108 stored_lcp = max_lcp;
109 SA_and_LCP[r] |= stored_lcp << LCP_SHIFT;
117 * Use the suffix array accompanied with the longest-common-prefix array ---
118 * which in combination can be called the "enhanced suffix array" --- to
119 * simulate a bottom-up traversal of the corresponding suffix tree, or
120 * equivalently the lcp-interval tree. Do so in suffix rank order, but save the
121 * superinterval references needed for later bottom-up traversal of the tree in
122 * suffix position order.
124 * To enumerate the lcp-intervals, this algorithm scans the suffix array and its
125 * corresponding LCP array linearly. While doing so, it maintains a stack of
126 * lcp-intervals that are currently open, meaning that their left boundaries
127 * have been seen but their right boundaries have not. The bottom of the stack
128 * is the interval which covers the entire suffix array (this has lcp=0), and
129 * the top of the stack is the deepest interval that is currently open (this has
130 * the greatest lcp of any interval on the stack). When this algorithm opens an
131 * lcp-interval, it assigns it a unique index in intervals[] and pushes it onto
132 * the stack. When this algorithm closes an interval, it pops it from the stack
133 * and sets the intervals[] entry of that interval to the index and lcp of that
134 * interval's superinterval, which is the new top of the stack.
136 * This algorithm also set pos_data[pos] for each suffix position 'pos' to the
137 * index and lcp of the deepest lcp-interval containing it. Alternatively, we
138 * can interpret each suffix as being associated with a singleton lcp-interval,
139 * or leaf of the suffix tree. With this interpretation, an entry in pos_data[]
140 * is the superinterval reference for one of these singleton lcp-intervals and
141 * therefore is not fundamentally different from an entry in intervals[].
143 * To reduce memory usage, this algorithm re-uses the suffix array's memory to
144 * store the generated intervals[] array. This is possible because SA and LCP
145 * are accessed linearly, and no more than one interval is generated per suffix.
147 * The techniques used in this algorithm are described in various published
148 * papers. The generation of lcp-intervals from the suffix array (SA) and the
149 * longest-common-prefix array (LCP) is given as Algorithm BottomUpTraverse in
150 * Kasai et al. (2001) and Algorithm 4.1 ("Computation of lcp-intervals") in
151 * Abouelhoda et al. (2004). Both these papers note the equivalence between
152 * lcp-intervals (including the singleton lcp-interval for each suffix) and
153 * nodes of the suffix tree. Abouelhoda et al. (2004) furthermore applies
154 * bottom-up traversal of the lcp-interval tree to Lempel-Ziv factorization, as
155 * does Chen at al. (2008). Algorithm CPS1b of Chen et al. (2008) dynamically
156 * re-uses the suffix array during bottom-up traversal of the lcp-interval tree.
160 * Kasai et al. Linear-Time Longest-Common-Prefix Computation in Suffix
161 * Arrays and Its Applications. 2001. CPM '01 Proceedings of the 12th
162 * Annual Symposium on Combinatorial Pattern Matching pp. 181-192.
164 * M.I. Abouelhoda, S. Kurtz, E. Ohlebusch. 2004. Replacing Suffix Trees
165 * With Enhanced Suffix Arrays. Journal of Discrete Algorithms Volume 2
166 * Issue 1, March 2004, pp. 53-86.
168 * G. Chen, S.J. Puglisi, W.F. Smyth. 2008. Lempel-Ziv Factorization
169 * Using Less Time & Space. Mathematics in Computer Science June 2008,
170 * Volume 1, Issue 4, pp. 605-623.
173 build_LCPIT(u32 intervals[restrict], u32 pos_data[restrict], const u32 n)
175 u32 * const SA_and_LCP = intervals;
176 u32 next_interval_idx;
177 u32 open_intervals[LCP_MAX + 1];
178 u32 *top = open_intervals;
179 u32 prev_pos = SA_and_LCP[0] & POS_MASK;
183 next_interval_idx = 1;
185 for (u32 r = 1; r < n; r++) {
186 const u32 next_pos = SA_and_LCP[r] & POS_MASK;
187 const u32 next_lcp = SA_and_LCP[r] & LCP_MASK;
188 const u32 top_lcp = *top & LCP_MASK;
190 prefetchw(&pos_data[SA_and_LCP[r + PREFETCH_SAFETY] & POS_MASK]);
192 if (next_lcp == top_lcp) {
193 /* Continuing the deepest open interval */
194 pos_data[prev_pos] = *top;
195 } else if (next_lcp > top_lcp) {
196 /* Opening a new interval */
197 *++top = next_lcp | next_interval_idx++;
198 pos_data[prev_pos] = *top;
200 /* Closing the deepest open interval */
201 pos_data[prev_pos] = *top;
203 const u32 closed_interval_idx = *top-- & POS_MASK;
204 const u32 superinterval_lcp = *top & LCP_MASK;
206 if (next_lcp == superinterval_lcp) {
207 /* Continuing the superinterval */
208 intervals[closed_interval_idx] = *top;
210 } else if (next_lcp > superinterval_lcp) {
211 /* Creating a new interval that is a
212 * superinterval of the one being
213 * closed, but still a subinterval of
214 * its superinterval */
215 *++top = next_lcp | next_interval_idx++;
216 intervals[closed_interval_idx] = *top;
219 /* Also closing the superinterval */
220 intervals[closed_interval_idx] = *top;
227 /* Close any still-open intervals. */
228 pos_data[prev_pos] = *top;
229 for (; top > open_intervals; top--)
230 intervals[*top & POS_MASK] = *(top - 1);
234 * Advance the LCP-interval tree matchfinder by one byte.
236 * If @record_matches is true, then matches are written to the @matches array
237 * sorted by strictly decreasing length and strictly decreasing offset, and the
238 * return value is the number of matches found. Otherwise, @matches is ignored
239 * and the return value is always 0.
241 * How this algorithm works:
243 * 'cur_pos' is the position of the current suffix, which is the suffix being
244 * matched against. 'cur_pos' starts at 0 and is incremented each time this
245 * function is called. This function finds each suffix with position less than
246 * 'cur_pos' that shares a prefix with the current suffix, but for each distinct
247 * prefix length it finds only the suffix with the greatest position (i.e. the
248 * most recently seen in the linear traversal by position). This function
249 * accomplishes this using the lcp-interval tree data structure that was built
250 * by build_LCPIT() and is updated dynamically by this function.
252 * The first step is to read 'pos_data[cur_pos]', which gives the index and lcp
253 * value of the deepest lcp-interval containing the current suffix --- or,
254 * equivalently, the parent of the conceptual singleton lcp-interval that
255 * contains the current suffix.
257 * The second step is to ascend the lcp-interval tree until reaching an interval
258 * that has not yet been visited, and link the intervals to the current suffix
259 * along the way. An lcp-interval has been visited if and only if it has been
260 * linked to a suffix. Initially, no lcp-intervals are linked to suffixes.
262 * The third step is to continue ascending the lcp-interval tree, but indirectly
263 * through suffix links rather than through the original superinterval
264 * references, and continuing to form links with the current suffix along the
265 * way. Each suffix visited during this step, except in a special case to
266 * handle outdated suffixes, is a match which can be written to matches[]. Each
267 * intervals[] entry contains the position of the next suffix to visit, which we
268 * shall call 'match_pos'; this is the most recently seen suffix that belongs to
269 * that lcp-interval. 'pos_data[match_pos]' then contains the lcp and interval
270 * index of the next lcp-interval that should be visited.
272 * We can view these arrays as describing a new set of links that gets overlaid
273 * on top of the original superinterval references of the lcp-interval tree.
274 * Each such link can connect a node of the lcp-interval tree to an ancestor
275 * more than one generation removed.
277 * For each one-byte advance, the current position becomes the most recently
278 * seen suffix for a continuous sequence of lcp-intervals from a leaf interval
279 * to the root interval. Conceptually, this algorithm needs to update all these
280 * nodes to link to 'cur_pos', and then update 'pos_data[cur_pos]' to a "null"
281 * link. But actually, if some of these nodes have the same most recently seen
282 * suffix, then this algorithm just visits the pos_data[] entry for that suffix
283 * and skips over all these nodes in one step. Updating the extra nodes is
284 * accomplished by creating a redirection from the previous suffix to the
287 * Using this shortcutting scheme, it's possible for a suffix to become out of
288 * date, which means that it is no longer the most recently seen suffix for the
289 * lcp-interval under consideration. This case is detected by noticing when the
290 * next lcp-interval link actually points deeper in the tree, and it is worked
291 * around by just continuing until we get to a link that actually takes us
292 * higher in the tree. This can be described as a lazy-update scheme.
294 static forceinline u32
295 lcpit_advance_one_byte(const u32 cur_pos,
296 u32 pos_data[restrict],
297 u32 intervals[restrict],
299 struct lz_match matches[restrict],
300 const bool record_matches)
305 struct lz_match *matchptr;
307 /* Get the deepest lcp-interval containing the current suffix. */
308 ref = pos_data[cur_pos];
310 /* Prefetch upcoming data, up to 3 positions ahead. Assume the
311 * intervals are already visited. */
313 /* Prefetch the superinterval via a suffix link for the deepest
314 * lcp-interval containing the suffix starting 1 position from now. */
315 prefetchw(&intervals[pos_data[next[0]] & POS_MASK]);
317 /* Prefetch suffix link for the deepest lcp-interval containing the
318 * suffix starting 2 positions from now. */
319 next[0] = intervals[next[1]] & POS_MASK;
320 prefetchw(&pos_data[next[0]]);
322 /* Prefetch the deepest lcp-interval containing the suffix starting 3
323 * positions from now. */
324 next[1] = pos_data[cur_pos + 3] & POS_MASK;
325 prefetchw(&intervals[next[1]]);
327 /* There is no "next suffix" after the current one. */
328 pos_data[cur_pos] = 0;
330 /* Ascend until we reach a visited interval, the root, or a child of the
331 * root. Link unvisited intervals to the current suffix as we go. */
332 while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
333 intervals[ref & POS_MASK] = cur_pos;
337 if (super_ref == 0) {
338 /* In this case, the current interval may be any of:
340 * (2) an unvisited child of the root;
341 * (3) an interval last visited by suffix 0
343 * We could avoid the ambiguity with (3) by using an lcp
344 * placeholder value other than 0 to represent "visited", but
345 * it's fastest to use 0. So we just don't allow matches with
348 if (ref != 0) /* Not the root? */
349 intervals[ref & POS_MASK] = cur_pos;
353 /* Ascend indirectly via pos_data[] links. */
354 match_pos = super_ref;
357 while ((super_ref = pos_data[match_pos]) > ref)
358 match_pos = intervals[super_ref & POS_MASK];
359 intervals[ref & POS_MASK] = cur_pos;
360 pos_data[match_pos] = ref;
361 if (record_matches) {
362 matchptr->length = ref >> LCP_SHIFT;
363 matchptr->offset = cur_pos - match_pos;
369 match_pos = intervals[ref & POS_MASK];
371 return matchptr - matches;
374 /* Expand SA from 32 bits to 64 bits. */
376 expand_SA(void *p, u32 n)
378 typedef u32 _may_alias_attribute aliased_u32_t;
379 typedef u64 _may_alias_attribute aliased_u64_t;
381 aliased_u32_t *SA = p;
382 aliased_u64_t *SA64 = p;
390 /* Like build_LCP(), but for buffers larger than MAX_NORMAL_BUFSIZE. */
392 build_LCP_huge(u64 SA_and_LCP64[restrict], const u32 ISA[restrict],
393 const u8 T[restrict], const u32 n,
394 const u32 min_lcp, const u32 max_lcp)
397 for (u32 i = 0; i < n; i++) {
398 const u32 r = ISA[i];
399 prefetchw(&SA_and_LCP64[ISA[i + PREFETCH_SAFETY]]);
401 const u32 j = SA_and_LCP64[r - 1] & HUGE_POS_MASK;
402 const u32 lim = min(n - i, n - j);
403 while (h < lim && T[i + h] == T[j + h])
406 if (stored_lcp < min_lcp)
408 else if (stored_lcp > max_lcp)
409 stored_lcp = max_lcp;
410 SA_and_LCP64[r] |= (u64)stored_lcp << HUGE_LCP_SHIFT;
418 * Like build_LCPIT(), but for buffers larger than MAX_NORMAL_BUFSIZE.
420 * This "huge" version is also slightly different in that the lcp value stored
421 * in each intervals[] entry is the lcp value for that interval, not its
422 * superinterval. This lcp value stays put in intervals[] and doesn't get moved
423 * to pos_data[] during lcpit_advance_one_byte_huge(). One consequence of this
424 * is that we have to use a special flag to distinguish visited from unvisited
425 * intervals. But overall, this scheme keeps the memory usage at 12n instead of
426 * 16n. (The non-huge version is 8n.)
429 build_LCPIT_huge(u64 intervals64[restrict], u32 pos_data[restrict], const u32 n)
431 u64 * const SA_and_LCP64 = intervals64;
432 u32 next_interval_idx;
433 u32 open_intervals[HUGE_LCP_MAX + 1];
434 u32 *top = open_intervals;
435 u32 prev_pos = SA_and_LCP64[0] & HUGE_POS_MASK;
439 next_interval_idx = 1;
441 for (u32 r = 1; r < n; r++) {
442 const u32 next_pos = SA_and_LCP64[r] & HUGE_POS_MASK;
443 const u64 next_lcp = SA_and_LCP64[r] & HUGE_LCP_MASK;
444 const u64 top_lcp = intervals64[*top];
446 prefetchw(&pos_data[SA_and_LCP64[r + PREFETCH_SAFETY] & HUGE_POS_MASK]);
448 if (next_lcp == top_lcp) {
449 /* Continuing the deepest open interval */
450 pos_data[prev_pos] = *top;
451 } else if (next_lcp > top_lcp) {
452 /* Opening a new interval */
453 intervals64[next_interval_idx] = next_lcp;
454 pos_data[prev_pos] = next_interval_idx;
455 *++top = next_interval_idx++;
457 /* Closing the deepest open interval */
458 pos_data[prev_pos] = *top;
460 const u32 closed_interval_idx = *top--;
461 const u64 superinterval_lcp = intervals64[*top];
463 if (next_lcp == superinterval_lcp) {
464 /* Continuing the superinterval */
465 intervals64[closed_interval_idx] |=
466 HUGE_UNVISITED_TAG | *top;
468 } else if (next_lcp > superinterval_lcp) {
469 /* Creating a new interval that is a
470 * superinterval of the one being
471 * closed, but still a subinterval of
472 * its superinterval */
473 intervals64[next_interval_idx] = next_lcp;
474 intervals64[closed_interval_idx] |=
475 HUGE_UNVISITED_TAG | next_interval_idx;
476 *++top = next_interval_idx++;
479 /* Also closing the superinterval */
480 intervals64[closed_interval_idx] |=
481 HUGE_UNVISITED_TAG | *top;
488 /* Close any still-open intervals. */
489 pos_data[prev_pos] = *top;
490 for (; top > open_intervals; top--)
491 intervals64[*top] |= HUGE_UNVISITED_TAG | *(top - 1);
494 /* Like lcpit_advance_one_byte(), but for buffers larger than
495 * MAX_NORMAL_BUFSIZE. */
496 static forceinline u32
497 lcpit_advance_one_byte_huge(const u32 cur_pos,
498 u32 pos_data[restrict],
499 u64 intervals64[restrict],
500 u32 prefetch_next[restrict],
501 struct lz_match matches[restrict],
502 const bool record_matches)
505 u32 next_interval_idx;
509 struct lz_match *matchptr;
511 interval_idx = pos_data[cur_pos];
513 prefetchw(&intervals64[pos_data[prefetch_next[0]] & HUGE_POS_MASK]);
515 prefetch_next[0] = intervals64[prefetch_next[1]] & HUGE_POS_MASK;
516 prefetchw(&pos_data[prefetch_next[0]]);
518 prefetch_next[1] = pos_data[cur_pos + 3] & HUGE_POS_MASK;
519 prefetchw(&intervals64[prefetch_next[1]]);
521 pos_data[cur_pos] = 0;
523 while ((next = intervals64[interval_idx]) & HUGE_UNVISITED_TAG) {
524 intervals64[interval_idx] = (next & HUGE_LCP_MASK) | cur_pos;
525 interval_idx = next & HUGE_POS_MASK;
529 while (next & HUGE_LCP_MASK) {
532 match_pos = next & HUGE_POS_MASK;
533 next_interval_idx = pos_data[match_pos];
534 next = intervals64[next_interval_idx];
535 } while (next > cur);
536 intervals64[interval_idx] = (cur & HUGE_LCP_MASK) | cur_pos;
537 pos_data[match_pos] = interval_idx;
538 if (record_matches) {
539 matchptr->length = cur >> HUGE_LCP_SHIFT;
540 matchptr->offset = cur_pos - match_pos;
543 interval_idx = next_interval_idx;
545 return matchptr - matches;
548 static forceinline u64
549 get_pos_data_size(size_t max_bufsize)
551 return (u64)max((u64)max_bufsize + PREFETCH_SAFETY,
552 DIVSUFSORT_TMP_LEN) * sizeof(u32);
555 static forceinline u64
556 get_intervals_size(size_t max_bufsize)
558 return ((u64)max_bufsize + PREFETCH_SAFETY) *
559 (max_bufsize <= MAX_NORMAL_BUFSIZE ? sizeof(u32) : sizeof(u64));
563 * Calculate the number of bytes of memory needed for the LCP-interval tree
566 * @max_bufsize - maximum buffer size to support
568 * Returns the number of bytes required.
571 lcpit_matchfinder_get_needed_memory(size_t max_bufsize)
573 return get_pos_data_size(max_bufsize) + get_intervals_size(max_bufsize);
577 * Initialize the LCP-interval tree matchfinder.
579 * @mf - the matchfinder structure to initialize
580 * @max_bufsize - maximum buffer size to support
581 * @min_match_len - minimum match length in bytes
582 * @nice_match_len - only consider this many bytes of each match
584 * Returns true if successfully initialized; false if out of memory.
587 lcpit_matchfinder_init(struct lcpit_matchfinder *mf, size_t max_bufsize,
588 u32 min_match_len, u32 nice_match_len)
590 if (lcpit_matchfinder_get_needed_memory(max_bufsize) > SIZE_MAX)
592 if (max_bufsize > MAX_HUGE_BUFSIZE - PREFETCH_SAFETY)
595 mf->pos_data = MALLOC(get_pos_data_size(max_bufsize));
596 mf->intervals = MALLOC(get_intervals_size(max_bufsize));
597 if (!mf->pos_data || !mf->intervals) {
598 lcpit_matchfinder_destroy(mf);
602 mf->min_match_len = min_match_len;
603 mf->orig_nice_match_len = nice_match_len;
608 * Build the suffix array SA for the specified byte array T of length n.
610 * The suffix array is a sorted array of the byte array's suffixes, represented
611 * by indices into the byte array. It can equivalently be viewed as a mapping
612 * from suffix rank to suffix position.
614 * To build the suffix array, we use libdivsufsort, which uses an
615 * induced-sorting-based algorithm. In practice, this seems to be the fastest
616 * suffix array construction algorithm currently available.
620 * Y. Mori. libdivsufsort, a lightweight suffix-sorting library.
621 * https://github.com/y-256/libdivsufsort
623 * G. Nong, S. Zhang, and W.H. Chan. 2009. Linear Suffix Array
624 * Construction by Almost Pure Induced-Sorting. Data Compression
625 * Conference, 2009. DCC '09. pp. 193 - 202.
627 * S.J. Puglisi, W.F. Smyth, and A. Turpin. 2007. A Taxonomy of Suffix
628 * Array Construction Algorithms. ACM Computing Surveys (CSUR) Volume 39
629 * Issue 2, 2007 Article No. 4.
632 build_SA(u32 SA[], const u8 T[], u32 n, u32 *tmp)
634 /* Note: divsufsort() requires a fixed amount of temporary space. The
635 * implementation of divsufsort() has been modified from the original to
636 * use the provided temporary space instead of allocating its own, since
637 * we don't want to have to deal with malloc() failures here. */
638 divsufsort(T, SA, n, tmp);
642 * Build the inverse suffix array ISA from the suffix array SA.
644 * Whereas the suffix array is a mapping from suffix rank to suffix position,
645 * the inverse suffix array is a mapping from suffix position to suffix rank.
648 build_ISA(u32 ISA[restrict], const u32 SA[restrict], u32 n)
650 for (u32 r = 0; r < n; r++)
655 * Prepare the LCP-interval tree matchfinder for a new input buffer.
657 * @mf - the initialized matchfinder structure
658 * @T - the input buffer
659 * @n - size of the input buffer in bytes. This must be nonzero and can be at
660 * most the max_bufsize with which lcpit_matchfinder_init() was called.
663 lcpit_matchfinder_load_buffer(struct lcpit_matchfinder *mf, const u8 *T, u32 n)
665 /* intervals[] temporarily stores SA and LCP packed together.
666 * pos_data[] temporarily stores ISA.
667 * pos_data[] is also used as the temporary space for divsufsort(). */
669 build_SA(mf->intervals, T, n, mf->pos_data);
670 build_ISA(mf->pos_data, mf->intervals, n);
671 if (n <= MAX_NORMAL_BUFSIZE) {
672 mf->nice_match_len = min(mf->orig_nice_match_len, LCP_MAX);
673 for (u32 i = 0; i < PREFETCH_SAFETY; i++) {
674 mf->intervals[n + i] = 0;
675 mf->pos_data[n + i] = 0;
677 build_LCP(mf->intervals, mf->pos_data, T, n,
678 mf->min_match_len, mf->nice_match_len);
679 build_LCPIT(mf->intervals, mf->pos_data, n);
680 mf->huge_mode = false;
682 mf->nice_match_len = min(mf->orig_nice_match_len, HUGE_LCP_MAX);
683 for (u32 i = 0; i < PREFETCH_SAFETY; i++) {
684 mf->intervals64[n + i] = 0;
685 mf->pos_data[n + i] = 0;
687 expand_SA(mf->intervals, n);
688 build_LCP_huge(mf->intervals64, mf->pos_data, T, n,
689 mf->min_match_len, mf->nice_match_len);
690 build_LCPIT_huge(mf->intervals64, mf->pos_data, n);
691 mf->huge_mode = true;
693 mf->cur_pos = 0; /* starting at beginning of input buffer */
694 for (u32 i = 0; i < ARRAY_LEN(mf->next); i++)
699 * Retrieve a list of matches with the next position.
701 * The matches will be recorded in the @matches array, ordered by strictly
702 * decreasing length and strictly decreasing offset.
704 * The return value is the number of matches found and written to @matches.
705 * This can be any value in [0, nice_match_len - min_match_len + 1].
708 lcpit_matchfinder_get_matches(struct lcpit_matchfinder *mf,
709 struct lz_match *matches)
712 return lcpit_advance_one_byte_huge(mf->cur_pos++, mf->pos_data,
713 mf->intervals64, mf->next,
716 return lcpit_advance_one_byte(mf->cur_pos++, mf->pos_data,
717 mf->intervals, mf->next,
722 * Skip the next @count bytes (don't search for matches at them). @count is
726 lcpit_matchfinder_skip_bytes(struct lcpit_matchfinder *mf, u32 count)
730 lcpit_advance_one_byte_huge(mf->cur_pos++, mf->pos_data,
731 mf->intervals64, mf->next,
736 lcpit_advance_one_byte(mf->cur_pos++, mf->pos_data,
737 mf->intervals, mf->next,
744 * Destroy an LCP-interval tree matchfinder that was previously initialized with
745 * lcpit_matchfinder_init().
748 lcpit_matchfinder_destroy(struct lcpit_matchfinder *mf)