/* * lcpit_matchfinder.c * * A match-finder for Lempel-Ziv compression based on bottom-up construction and * traversal of the Longest Common Prefix (LCP) interval tree. * * The following copying information applies to this specific source code file: * * Written in 2014-2015 by Eric Biggers * * To the extent possible under law, the author(s) have dedicated all copyright * and related and neighboring rights to this software to the public domain * worldwide via the Creative Commons Zero 1.0 Universal Public Domain * Dedication (the "CC0"). * * This software is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the CC0 for more details. * * You should have received a copy of the CC0 along with this software; if not * see . */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include "wimlib/divsufsort.h" #include "wimlib/lcpit_matchfinder.h" #include "wimlib/util.h" #define LCP_BITS 6 #define LCP_MAX (((u32)1 << LCP_BITS) - 1) #define LCP_SHIFT (32 - LCP_BITS) #define LCP_MASK (LCP_MAX << LCP_SHIFT) #define POS_MASK (((u32)1 << (32 - LCP_BITS)) - 1) #define MAX_NORMAL_BUFSIZE (POS_MASK + 1) #define HUGE_LCP_BITS 7 #define HUGE_LCP_MAX (((u32)1 << HUGE_LCP_BITS) - 1) #define HUGE_LCP_SHIFT (64 - HUGE_LCP_BITS) #define HUGE_LCP_MASK ((u64)HUGE_LCP_MAX << HUGE_LCP_SHIFT) #define HUGE_POS_MASK 0xFFFFFFFF #define MAX_HUGE_BUFSIZE ((u64)HUGE_POS_MASK + 1) #define HUGE_UNVISITED_TAG 0x100000000 #define PREFETCH_SAFETY 5 /* * Build the LCP (Longest Common Prefix) array in linear time. * * LCP[r] will be the length of the longest common prefix between the suffixes * with positions SA[r - 1] and SA[r]. LCP[0] will be undefined. * * Algorithm taken from Kasai et al. (2001), but modified slightly: * * - With bytes there is no realistic way to reserve a unique symbol for * end-of-buffer, so use explicit checks for end-of-buffer. * * - For decreased memory usage and improved memory locality, pack the two * logically distinct SA and LCP arrays into a single array SA_and_LCP. * * - Since SA_and_LCP is accessed randomly, improve the cache behavior by * reading several entries ahead in ISA and prefetching the upcoming * SA_and_LCP entry. * * - If an LCP value is less than the minimum match length, then store 0. This * avoids having to do comparisons against the minimum match length later. * * - If an LCP value is greater than the "nice match length", then store the * "nice match length". This caps the number of bits needed to store each * LCP value, and this caps the depth of the LCP-interval tree, without * usually hurting the compression ratio too much. * * References: * * Kasai et al. 2001. Linear-Time Longest-Common-Prefix Computation in * Suffix Arrays and Its Applications. CPM '01 Proceedings of the 12th * Annual Symposium on Combinatorial Pattern Matching pp. 181-192. */ static void build_LCP(u32 SA_and_LCP[restrict], const u32 ISA[restrict], const u8 T[restrict], const u32 n, const u32 min_lcp, const u32 max_lcp) { u32 h = 0; for (u32 i = 0; i < n; i++) { const u32 r = ISA[i]; prefetchw(&SA_and_LCP[ISA[i + PREFETCH_SAFETY]]); if (r > 0) { const u32 j = SA_and_LCP[r - 1] & POS_MASK; const u32 lim = min(n - i, n - j); while (h < lim && T[i + h] == T[j + h]) h++; u32 stored_lcp = h; if (stored_lcp < min_lcp) stored_lcp = 0; else if (stored_lcp > max_lcp) stored_lcp = max_lcp; SA_and_LCP[r] |= stored_lcp << LCP_SHIFT; if (h > 0) h--; } } } /* * Use the suffix array accompanied with the longest-common-prefix array --- * which in combination can be called the "enhanced suffix array" --- to * simulate a bottom-up traversal of the corresponding suffix tree, or * equivalently the lcp-interval tree. Do so in suffix rank order, but save the * superinterval references needed for later bottom-up traversal of the tree in * suffix position order. * * To enumerate the lcp-intervals, this algorithm scans the suffix array and its * corresponding LCP array linearly. While doing so, it maintains a stack of * lcp-intervals that are currently open, meaning that their left boundaries * have been seen but their right boundaries have not. The bottom of the stack * is the interval which covers the entire suffix array (this has lcp=0), and * the top of the stack is the deepest interval that is currently open (this has * the greatest lcp of any interval on the stack). When this algorithm opens an * lcp-interval, it assigns it a unique index in intervals[] and pushes it onto * the stack. When this algorithm closes an interval, it pops it from the stack * and sets the intervals[] entry of that interval to the index and lcp of that * interval's superinterval, which is the new top of the stack. * * This algorithm also set pos_data[pos] for each suffix position 'pos' to the * index and lcp of the deepest lcp-interval containing it. Alternatively, we * can interpret each suffix as being associated with a singleton lcp-interval, * or leaf of the suffix tree. With this interpretation, an entry in pos_data[] * is the superinterval reference for one of these singleton lcp-intervals and * therefore is not fundamentally different from an entry in intervals[]. * * To reduce memory usage, this algorithm re-uses the suffix array's memory to * store the generated intervals[] array. This is possible because SA and LCP * are accessed linearly, and no more than one interval is generated per suffix. * * The techniques used in this algorithm are described in various published * papers. The generation of lcp-intervals from the suffix array (SA) and the * longest-common-prefix array (LCP) is given as Algorithm BottomUpTraverse in * Kasai et al. (2001) and Algorithm 4.1 ("Computation of lcp-intervals") in * Abouelhoda et al. (2004). Both these papers note the equivalence between * lcp-intervals (including the singleton lcp-interval for each suffix) and * nodes of the suffix tree. Abouelhoda et al. (2004) furthermore applies * bottom-up traversal of the lcp-interval tree to Lempel-Ziv factorization, as * does Chen at al. (2008). Algorithm CPS1b of Chen et al. (2008) dynamically * re-uses the suffix array during bottom-up traversal of the lcp-interval tree. * * References: * * Kasai et al. Linear-Time Longest-Common-Prefix Computation in Suffix * Arrays and Its Applications. 2001. CPM '01 Proceedings of the 12th * Annual Symposium on Combinatorial Pattern Matching pp. 181-192. * * M.I. Abouelhoda, S. Kurtz, E. Ohlebusch. 2004. Replacing Suffix Trees * With Enhanced Suffix Arrays. Journal of Discrete Algorithms Volume 2 * Issue 1, March 2004, pp. 53-86. * * G. Chen, S.J. Puglisi, W.F. Smyth. 2008. Lempel-Ziv Factorization * Using Less Time & Space. Mathematics in Computer Science June 2008, * Volume 1, Issue 4, pp. 605-623. */ static void build_LCPIT(u32 intervals[restrict], u32 pos_data[restrict], const u32 n) { u32 * const SA_and_LCP = intervals; u32 next_interval_idx; u32 open_intervals[LCP_MAX + 1]; u32 *top = open_intervals; u32 prev_pos = SA_and_LCP[0] & POS_MASK; *top = 0; intervals[0] = 0; next_interval_idx = 1; for (u32 r = 1; r < n; r++) { const u32 next_pos = SA_and_LCP[r] & POS_MASK; const u32 next_lcp = SA_and_LCP[r] & LCP_MASK; const u32 top_lcp = *top & LCP_MASK; prefetchw(&pos_data[SA_and_LCP[r + PREFETCH_SAFETY] & POS_MASK]); if (next_lcp == top_lcp) { /* Continuing the deepest open interval */ pos_data[prev_pos] = *top; } else if (next_lcp > top_lcp) { /* Opening a new interval */ *++top = next_lcp | next_interval_idx++; pos_data[prev_pos] = *top; } else { /* Closing the deepest open interval */ pos_data[prev_pos] = *top; for (;;) { const u32 closed_interval_idx = *top-- & POS_MASK; const u32 superinterval_lcp = *top & LCP_MASK; if (next_lcp == superinterval_lcp) { /* Continuing the superinterval */ intervals[closed_interval_idx] = *top; break; } else if (next_lcp > superinterval_lcp) { /* Creating a new interval that is a * superinterval of the one being * closed, but still a subinterval of * its superinterval */ *++top = next_lcp | next_interval_idx++; intervals[closed_interval_idx] = *top; break; } else { /* Also closing the superinterval */ intervals[closed_interval_idx] = *top; } } } prev_pos = next_pos; } /* Close any still-open intervals. */ pos_data[prev_pos] = *top; for (; top > open_intervals; top--) intervals[*top & POS_MASK] = *(top - 1); } /* * Advance the LCP-interval tree matchfinder by one byte. * * If @record_matches is true, then matches are written to the @matches array * sorted by strictly decreasing length and strictly decreasing offset, and the * return value is the number of matches found. Otherwise, @matches is ignored * and the return value is always 0. * * How this algorithm works: * * 'cur_pos' is the position of the current suffix, which is the suffix being * matched against. 'cur_pos' starts at 0 and is incremented each time this * function is called. This function finds each suffix with position less than * 'cur_pos' that shares a prefix with the current suffix, but for each distinct * prefix length it finds only the suffix with the greatest position (i.e. the * most recently seen in the linear traversal by position). This function * accomplishes this using the lcp-interval tree data structure that was built * by build_LCPIT() and is updated dynamically by this function. * * The first step is to read 'pos_data[cur_pos]', which gives the index and lcp * value of the deepest lcp-interval containing the current suffix --- or, * equivalently, the parent of the conceptual singleton lcp-interval that * contains the current suffix. * * The second step is to ascend the lcp-interval tree until reaching an interval * that has not yet been visited, and link the intervals to the current suffix * along the way. An lcp-interval has been visited if and only if it has been * linked to a suffix. Initially, no lcp-intervals are linked to suffixes. * * The third step is to continue ascending the lcp-interval tree, but indirectly * through suffix links rather than through the original superinterval * references, and continuing to form links with the current suffix along the * way. Each suffix visited during this step, except in a special case to * handle outdated suffixes, is a match which can be written to matches[]. Each * intervals[] entry contains the position of the next suffix to visit, which we * shall call 'match_pos'; this is the most recently seen suffix that belongs to * that lcp-interval. 'pos_data[match_pos]' then contains the lcp and interval * index of the next lcp-interval that should be visited. * * We can view these arrays as describing a new set of links that gets overlaid * on top of the original superinterval references of the lcp-interval tree. * Each such link can connect a node of the lcp-interval tree to an ancestor * more than one generation removed. * * For each one-byte advance, the current position becomes the most recently * seen suffix for a continuous sequence of lcp-intervals from a leaf interval * to the root interval. Conceptually, this algorithm needs to update all these * nodes to link to 'cur_pos', and then update 'pos_data[cur_pos]' to a "null" * link. But actually, if some of these nodes have the same most recently seen * suffix, then this algorithm just visits the pos_data[] entry for that suffix * and skips over all these nodes in one step. Updating the extra nodes is * accomplished by creating a redirection from the previous suffix to the * current suffix. * * Using this shortcutting scheme, it's possible for a suffix to become out of * date, which means that it is no longer the most recently seen suffix for the * lcp-interval under consideration. This case is detected by noticing when the * next lcp-interval link actually points deeper in the tree, and it is worked * around by just continuing until we get to a link that actually takes us * higher in the tree. This can be described as a lazy-update scheme. */ static forceinline u32 lcpit_advance_one_byte(const u32 cur_pos, u32 pos_data[restrict], u32 intervals[restrict], u32 next[restrict], struct lz_match matches[restrict], const bool record_matches) { u32 ref; u32 super_ref; u32 match_pos; struct lz_match *matchptr; /* Get the deepest lcp-interval containing the current suffix. */ ref = pos_data[cur_pos]; /* Prefetch upcoming data, up to 3 positions ahead. Assume the * intervals are already visited. */ /* Prefetch the superinterval via a suffix link for the deepest * lcp-interval containing the suffix starting 1 position from now. */ prefetchw(&intervals[pos_data[next[0]] & POS_MASK]); /* Prefetch suffix link for the deepest lcp-interval containing the * suffix starting 2 positions from now. */ next[0] = intervals[next[1]] & POS_MASK; prefetchw(&pos_data[next[0]]); /* Prefetch the deepest lcp-interval containing the suffix starting 3 * positions from now. */ next[1] = pos_data[cur_pos + 3] & POS_MASK; prefetchw(&intervals[next[1]]); /* There is no "next suffix" after the current one. */ pos_data[cur_pos] = 0; /* Ascend until we reach a visited interval, the root, or a child of the * root. Link unvisited intervals to the current suffix as we go. */ while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) { intervals[ref & POS_MASK] = cur_pos; ref = super_ref; } if (super_ref == 0) { /* In this case, the current interval may be any of: * (1) the root; * (2) an unvisited child of the root; * (3) an interval last visited by suffix 0 * * We could avoid the ambiguity with (3) by using an lcp * placeholder value other than 0 to represent "visited", but * it's fastest to use 0. So we just don't allow matches with * position 0. */ if (ref != 0) /* Not the root? */ intervals[ref & POS_MASK] = cur_pos; return 0; } /* Ascend indirectly via pos_data[] links. */ match_pos = super_ref; matchptr = matches; for (;;) { while ((super_ref = pos_data[match_pos]) > ref) match_pos = intervals[super_ref & POS_MASK]; intervals[ref & POS_MASK] = cur_pos; pos_data[match_pos] = ref; if (record_matches) { matchptr->length = ref >> LCP_SHIFT; matchptr->offset = cur_pos - match_pos; matchptr++; } if (super_ref == 0) break; ref = super_ref; match_pos = intervals[ref & POS_MASK]; } return matchptr - matches; } /* Expand SA from 32 bits to 64 bits. */ static void expand_SA(void *p, u32 n) { typedef u32 _may_alias_attribute aliased_u32_t; typedef u64 _may_alias_attribute aliased_u64_t; aliased_u32_t *SA = p; aliased_u64_t *SA64 = p; u32 r = n - 1; do { SA64[r] = SA[r]; } while (r--); } /* Like build_LCP(), but for buffers larger than MAX_NORMAL_BUFSIZE. */ static void build_LCP_huge(u64 SA_and_LCP64[restrict], const u32 ISA[restrict], const u8 T[restrict], const u32 n, const u32 min_lcp, const u32 max_lcp) { u32 h = 0; for (u32 i = 0; i < n; i++) { const u32 r = ISA[i]; prefetchw(&SA_and_LCP64[ISA[i + PREFETCH_SAFETY]]); if (r > 0) { const u32 j = SA_and_LCP64[r - 1] & HUGE_POS_MASK; const u32 lim = min(n - i, n - j); while (h < lim && T[i + h] == T[j + h]) h++; u32 stored_lcp = h; if (stored_lcp < min_lcp) stored_lcp = 0; else if (stored_lcp > max_lcp) stored_lcp = max_lcp; SA_and_LCP64[r] |= (u64)stored_lcp << HUGE_LCP_SHIFT; if (h > 0) h--; } } } /* * Like build_LCPIT(), but for buffers larger than MAX_NORMAL_BUFSIZE. * * This "huge" version is also slightly different in that the lcp value stored * in each intervals[] entry is the lcp value for that interval, not its * superinterval. This lcp value stays put in intervals[] and doesn't get moved * to pos_data[] during lcpit_advance_one_byte_huge(). One consequence of this * is that we have to use a special flag to distinguish visited from unvisited * intervals. But overall, this scheme keeps the memory usage at 12n instead of * 16n. (The non-huge version is 8n.) */ static void build_LCPIT_huge(u64 intervals64[restrict], u32 pos_data[restrict], const u32 n) { u64 * const SA_and_LCP64 = intervals64; u32 next_interval_idx; u32 open_intervals[HUGE_LCP_MAX + 1]; u32 *top = open_intervals; u32 prev_pos = SA_and_LCP64[0] & HUGE_POS_MASK; *top = 0; intervals64[0] = 0; next_interval_idx = 1; for (u32 r = 1; r < n; r++) { const u32 next_pos = SA_and_LCP64[r] & HUGE_POS_MASK; const u64 next_lcp = SA_and_LCP64[r] & HUGE_LCP_MASK; const u64 top_lcp = intervals64[*top]; prefetchw(&pos_data[SA_and_LCP64[r + PREFETCH_SAFETY] & HUGE_POS_MASK]); if (next_lcp == top_lcp) { /* Continuing the deepest open interval */ pos_data[prev_pos] = *top; } else if (next_lcp > top_lcp) { /* Opening a new interval */ intervals64[next_interval_idx] = next_lcp; pos_data[prev_pos] = next_interval_idx; *++top = next_interval_idx++; } else { /* Closing the deepest open interval */ pos_data[prev_pos] = *top; for (;;) { const u32 closed_interval_idx = *top--; const u64 superinterval_lcp = intervals64[*top]; if (next_lcp == superinterval_lcp) { /* Continuing the superinterval */ intervals64[closed_interval_idx] |= HUGE_UNVISITED_TAG | *top; break; } else if (next_lcp > superinterval_lcp) { /* Creating a new interval that is a * superinterval of the one being * closed, but still a subinterval of * its superinterval */ intervals64[next_interval_idx] = next_lcp; intervals64[closed_interval_idx] |= HUGE_UNVISITED_TAG | next_interval_idx; *++top = next_interval_idx++; break; } else { /* Also closing the superinterval */ intervals64[closed_interval_idx] |= HUGE_UNVISITED_TAG | *top; } } } prev_pos = next_pos; } /* Close any still-open intervals. */ pos_data[prev_pos] = *top; for (; top > open_intervals; top--) intervals64[*top] |= HUGE_UNVISITED_TAG | *(top - 1); } /* Like lcpit_advance_one_byte(), but for buffers larger than * MAX_NORMAL_BUFSIZE. */ static forceinline u32 lcpit_advance_one_byte_huge(const u32 cur_pos, u32 pos_data[restrict], u64 intervals64[restrict], u32 prefetch_next[restrict], struct lz_match matches[restrict], const bool record_matches) { u32 interval_idx; u32 next_interval_idx; u64 cur; u64 next; u32 match_pos; struct lz_match *matchptr; interval_idx = pos_data[cur_pos]; prefetchw(&intervals64[pos_data[prefetch_next[0]] & HUGE_POS_MASK]); prefetch_next[0] = intervals64[prefetch_next[1]] & HUGE_POS_MASK; prefetchw(&pos_data[prefetch_next[0]]); prefetch_next[1] = pos_data[cur_pos + 3] & HUGE_POS_MASK; prefetchw(&intervals64[prefetch_next[1]]); pos_data[cur_pos] = 0; while ((next = intervals64[interval_idx]) & HUGE_UNVISITED_TAG) { intervals64[interval_idx] = (next & HUGE_LCP_MASK) | cur_pos; interval_idx = next & HUGE_POS_MASK; } matchptr = matches; while (next & HUGE_LCP_MASK) { cur = next; do { match_pos = next & HUGE_POS_MASK; next_interval_idx = pos_data[match_pos]; next = intervals64[next_interval_idx]; } while (next > cur); intervals64[interval_idx] = (cur & HUGE_LCP_MASK) | cur_pos; pos_data[match_pos] = interval_idx; if (record_matches) { matchptr->length = cur >> HUGE_LCP_SHIFT; matchptr->offset = cur_pos - match_pos; matchptr++; } interval_idx = next_interval_idx; } return matchptr - matches; } static forceinline u64 get_pos_data_size(size_t max_bufsize) { return (u64)max((u64)max_bufsize + PREFETCH_SAFETY, DIVSUFSORT_TMP_LEN) * sizeof(u32); } static forceinline u64 get_intervals_size(size_t max_bufsize) { return ((u64)max_bufsize + PREFETCH_SAFETY) * (max_bufsize <= MAX_NORMAL_BUFSIZE ? sizeof(u32) : sizeof(u64)); } /* * Calculate the number of bytes of memory needed for the LCP-interval tree * matchfinder. * * @max_bufsize - maximum buffer size to support * * Returns the number of bytes required. */ u64 lcpit_matchfinder_get_needed_memory(size_t max_bufsize) { return get_pos_data_size(max_bufsize) + get_intervals_size(max_bufsize); } /* * Initialize the LCP-interval tree matchfinder. * * @mf - the matchfinder structure to initialize * @max_bufsize - maximum buffer size to support * @min_match_len - minimum match length in bytes * @nice_match_len - only consider this many bytes of each match * * Returns true if successfully initialized; false if out of memory. */ bool lcpit_matchfinder_init(struct lcpit_matchfinder *mf, size_t max_bufsize, u32 min_match_len, u32 nice_match_len) { if (lcpit_matchfinder_get_needed_memory(max_bufsize) > SIZE_MAX) return false; if (max_bufsize > MAX_HUGE_BUFSIZE - PREFETCH_SAFETY) return false; mf->pos_data = MALLOC(get_pos_data_size(max_bufsize)); mf->intervals = MALLOC(get_intervals_size(max_bufsize)); if (!mf->pos_data || !mf->intervals) { lcpit_matchfinder_destroy(mf); return false; } mf->min_match_len = min_match_len; mf->nice_match_len = min(nice_match_len, (max_bufsize <= MAX_NORMAL_BUFSIZE) ? LCP_MAX : HUGE_LCP_MAX); return true; } /* * Build the suffix array SA for the specified byte array T of length n. * * The suffix array is a sorted array of the byte array's suffixes, represented * by indices into the byte array. It can equivalently be viewed as a mapping * from suffix rank to suffix position. * * To build the suffix array, we use libdivsufsort, which uses an * induced-sorting-based algorithm. In practice, this seems to be the fastest * suffix array construction algorithm currently available. * * References: * * Y. Mori. libdivsufsort, a lightweight suffix-sorting library. * https://code.google.com/p/libdivsufsort/. * * G. Nong, S. Zhang, and W.H. Chan. 2009. Linear Suffix Array * Construction by Almost Pure Induced-Sorting. Data Compression * Conference, 2009. DCC '09. pp. 193 - 202. * * S.J. Puglisi, W.F. Smyth, and A. Turpin. 2007. A Taxonomy of Suffix * Array Construction Algorithms. ACM Computing Surveys (CSUR) Volume 39 * Issue 2, 2007 Article No. 4. */ static void build_SA(u32 SA[], const u8 T[], u32 n, u32 *tmp) { /* Note: divsufsort() requires a fixed amount of temporary space. The * implementation of divsufsort() has been modified from the original to * use the provided temporary space instead of allocating its own, since * we don't want to have to deal with malloc() failures here. */ divsufsort(T, SA, n, tmp); } /* * Build the inverse suffix array ISA from the suffix array SA. * * Whereas the suffix array is a mapping from suffix rank to suffix position, * the inverse suffix array is a mapping from suffix position to suffix rank. */ static void build_ISA(u32 ISA[restrict], const u32 SA[restrict], u32 n) { for (u32 r = 0; r < n; r++) ISA[SA[r]] = r; } /* * Prepare the LCP-interval tree matchfinder for a new input buffer. * * @mf - the initialized matchfinder structure * @T - the input buffer * @n - size of the input buffer in bytes. This must be nonzero and can be at * most the max_bufsize with which lcpit_matchfinder_init() was called. */ void lcpit_matchfinder_load_buffer(struct lcpit_matchfinder *mf, const u8 *T, u32 n) { /* intervals[] temporarily stores SA and LCP packed together. * pos_data[] temporarily stores ISA. * pos_data[] is also used as the temporary space for divsufsort(). */ build_SA(mf->intervals, T, n, mf->pos_data); build_ISA(mf->pos_data, mf->intervals, n); if (n <= MAX_NORMAL_BUFSIZE) { for (u32 i = 0; i < PREFETCH_SAFETY; i++) { mf->intervals[n + i] = 0; mf->pos_data[n + i] = 0; } build_LCP(mf->intervals, mf->pos_data, T, n, mf->min_match_len, mf->nice_match_len); build_LCPIT(mf->intervals, mf->pos_data, n); mf->huge_mode = false; } else { for (u32 i = 0; i < PREFETCH_SAFETY; i++) { mf->intervals64[n + i] = 0; mf->pos_data[n + i] = 0; } expand_SA(mf->intervals, n); build_LCP_huge(mf->intervals64, mf->pos_data, T, n, mf->min_match_len, mf->nice_match_len); build_LCPIT_huge(mf->intervals64, mf->pos_data, n); mf->huge_mode = true; } mf->cur_pos = 0; /* starting at beginning of input buffer */ for (u32 i = 0; i < ARRAY_LEN(mf->next); i++) mf->next[i] = 0; } /* * Retrieve a list of matches with the next position. * * The matches will be recorded in the @matches array, ordered by strictly * decreasing length and strictly decreasing offset. * * The return value is the number of matches found and written to @matches. * This can be any value in [0, nice_match_len - min_match_len + 1]. */ u32 lcpit_matchfinder_get_matches(struct lcpit_matchfinder *mf, struct lz_match *matches) { if (mf->huge_mode) return lcpit_advance_one_byte_huge(mf->cur_pos++, mf->pos_data, mf->intervals64, mf->next, matches, true); else return lcpit_advance_one_byte(mf->cur_pos++, mf->pos_data, mf->intervals, mf->next, matches, true); } /* * Skip the next @count bytes (don't search for matches at them). @count is * assumed to be > 0. */ void lcpit_matchfinder_skip_bytes(struct lcpit_matchfinder *mf, u32 count) { if (mf->huge_mode) { do { lcpit_advance_one_byte_huge(mf->cur_pos++, mf->pos_data, mf->intervals64, mf->next, NULL, false); } while (--count); } else { do { lcpit_advance_one_byte(mf->cur_pos++, mf->pos_data, mf->intervals, mf->next, NULL, false); } while (--count); } } /* * Destroy an LCP-interval tree matchfinder that was previously initialized with * lcpit_matchfinder_init(). */ void lcpit_matchfinder_destroy(struct lcpit_matchfinder *mf) { FREE(mf->pos_data); FREE(mf->intervals); }