--- /dev/null
+/*
+ * lz_linked_suffix_array.c
+ *
+ * Linked suffix array match-finder for Lempel-Ziv compression.
+ *
+ * Copyright (c) 2013, 2014 Eric Biggers. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "wimlib/lz_mf.h"
+#include "wimlib/lz_suffix_array_utils.h"
+#include "wimlib/util.h"
+
+struct salink;
+
+/* Length type --- must be an unsigned type large enough to hold the maximum
+ * match length. */
+typedef u16 lz_lsa_len_t;
+
+/* Type of distances in suffix array links. A larger type would allow skipping
+ * irrelevant suffixes more quickly, which is especially helpful towards the
+ * start of the window. However, even a single byte allows skipping 255 at a
+ * time, which where it matters is already a big improvement over the
+ * alternative of searching the suffixes consecutively. */
+typedef u8 lz_lsa_delta_t;
+
+#define LZ_LSA_LEN_MAX ((lz_lsa_len_t)~0UL)
+#define LZ_LSA_POS_MAX ((u32)~0UL)
+#define LZ_LSA_DELTA_MAX ((lz_lsa_delta_t)~0UL)
+
+/* State of the linked suffix array match-finder. */
+struct lz_lsa {
+
+ struct lz_mf base;
+
+ /* Suffix array for the current window.
+ * This is a mapping from suffix rank to suffix position. */
+ u32 *SA;
+
+ /* Inverse suffix array for the current window.
+ * This is a mapping from suffix position to suffix rank.
+ * If 0 <= r < window_size, then ISA[SA[r]] == r. */
+ u32 *ISA;
+
+ /* Suffix array links.
+ *
+ * During a linear scan of the input string to find matches, this array
+ * used to keep track of which rank suffixes in the suffix array appear
+ * before the current position. Instead of searching in the original
+ * suffix array, scans for matches at a given position traverse a linked
+ * list containing (usually) only suffixes that appear before that
+ * position. */
+ struct salink *salink;
+};
+
+/* Suffix array link. An array of these structures, one per suffix rank, is
+ * used as a replacement for the raw LCP (Longest Common Prefix) array to allow
+ * skipping over suffixes that appear later in the window and hence cannot be
+ * used as LZ77 matches. */
+struct salink {
+ union {
+ /* Temporary fields used while this structure is being
+ * initialized.
+ *
+ * Note: we want the entire `struct salink' to be only 6 bytes,
+ * even though this makes "next_initial" unaligned. */
+ struct {
+ u32 next_initial;
+ lz_lsa_len_t lcpnext_initial;
+ } _packed_attribute;
+
+ struct {
+ /* Intially, the length, in bytes, of the longest common
+ * prefix (LCP) between the suffix having this rank and
+ * the suffix with the smallest larger rank that
+ * starts earlier in the window than the suffix having
+ * this rank. If no such suffix exists, this will be 0.
+ *
+ * Later, during match-finding, after the corresponding
+ * suffix has entered the LZ77 dictionary, this value
+ * may be updated by lz_lsa_update_salink() to refer
+ * instead to a lexicographically closer (but still
+ * larger) suffix that begins at a later position that
+ * has entered the LZ77 dictionary. */
+ lz_lsa_len_t lcpnext;
+
+ /* Initially, the length, in bytes, of the longest
+ * common prefix (LCP) between the suffix having this
+ * rank and the suffix with the largest smaller rank
+ * that starts earlier in the window than the suffix
+ * having this rank. If no such suffix exists, this
+ * will be 0.
+ *
+ * Later, during match-finding, after the corresponding
+ * suffix has entered the LZ77 dictionary, this value
+ * may be updated by lz_lsa_update_salink() to refer
+ * instead to a lexicographically closer (but still
+ * smaller) suffix that begins at a later position that
+ * has entered the LZ77 dictionary. */
+ lz_lsa_len_t lcpprev;
+
+ /* Distance to the suffix referred to in the description
+ * of "lcpnext" above, but capped to a maximum value to
+ * save memory; or, 0 if no such suffix exists. If the
+ * true distance was truncated, this will give the
+ * distance to the rank of a suffix that is
+ * lexicographically closer to the current suffix than
+ * the desired suffix, but appears *later* in the window
+ * and hence cannot be used as the basis for an LZ77
+ * match. */
+ lz_lsa_delta_t dist_to_next;
+
+ /* Distance to the suffix referred to in the description
+ * of "lcpprev" above, but capped to a maximum value to
+ * save memory; or, 0 if no such suffix exists. If the
+ * true distance was truncated, this will give the
+ * distance to the rank of a suffix that is
+ * lexicographically closer to the current suffix than
+ * the desired suffix, but appears *later* in the window
+ * and hence cannot be used as the basis for an LZ77
+ * match. */
+ lz_lsa_delta_t dist_to_prev;
+ };
+ };
+};
+
+/* Initialize the SA link array in linear time.
+ *
+ * This is similar to computing the LPF (Longest Previous Factor) array, which
+ * is addressed in several papers. In particular the algorithms below are based
+ * on Crochemore et al. 2009: "LPF computation revisited". However, this
+ * match-finder does not actually compute or use the LPF array per se. Rather,
+ * this function sets up some information necessary to compute the LPF array,
+ * but later lz_lsa_get_matches() actually uses this information to search
+ * the suffix array directly and can keep searching beyond the first (longest)
+ * match whose length would be placed in the LPF array. This difference from
+ * the theoretical work is necessary because in many real compression formats
+ * matches take variable numbers of bits to encode, so a decent parser needs to
+ * consider more than just the longest match with unspecified offset.
+ *
+ * Note: We cap the lcpprev and lcpnext values to the maximum match length so
+ * that the match-finder need not worry about it later, in the inner loop.
+ *
+ * Note: the LCP array is one of the inputs to this function, but it is used as
+ * temporary space and therefore will be invalidated.
+ */
+static void
+init_salink(struct salink link[restrict], u32 LCP[restrict],
+ const u32 SA[restrict], const u8 T[restrict], u32 n,
+ lz_lsa_len_t min_match_len, lz_lsa_len_t max_match_len)
+{
+ /* Calculate salink.dist_to_next and salink.lcpnext.
+ *
+ * Pass 1 calculates, for each suffix rank, the corresponding
+ * "next_initial" value which is the smallest larger rank that
+ * corresponds to a suffix starting earlier in the string. It also
+ * calculates "lcpnext_initial", which is the longest common prefix with
+ * that suffix, although to eliminate checks in lz_lsa_get_matches(),
+ * "lcpnext_initial" is set to 0 if it's less than the minimum match
+ * length or set to the maximum match length if it's greater than the
+ * maximum match length.
+ *
+ * Pass 2 translates each absolute "next_initial", a 4-byte value, into
+ * a relative "dist_to_next", a 1-byte value. This is done to save
+ * memory. In the case that the exact relative distance cannot be
+ * encoded in 1 byte, it is capped to 255. This is valid as long as
+ * lz_lsa_get_matches() validates each position before using it.
+ * Note that "lcpnext" need not be updated in this case because it will
+ * not be used until the actual next rank has been found anyway.
+ */
+ link[n - 1].next_initial = LZ_LSA_POS_MAX;
+ link[n - 1].lcpnext_initial = 0;
+ for (u32 r = n - 2; r != LZ_LSA_POS_MAX; r--) {
+ u32 t = r + 1;
+ u32 l = LCP[t];
+ while (t != LZ_LSA_POS_MAX && SA[t] > SA[r]) {
+ l = min(l, link[t].lcpnext_initial);
+ t = link[t].next_initial;
+ }
+ link[r].next_initial = t;
+
+ if (l < min_match_len)
+ l = 0;
+ else if (l > max_match_len)
+ l = max_match_len;
+ link[r].lcpnext_initial = l;
+ }
+ for (u32 r = 0; r < n; r++) {
+ u32 next;
+ lz_lsa_len_t l;
+ lz_lsa_delta_t dist_to_next;
+
+ next = link[r].next_initial;
+ l = link[r].lcpnext_initial;
+
+ if (next == LZ_LSA_POS_MAX)
+ dist_to_next = 0;
+ else if (next - r <= LZ_LSA_DELTA_MAX)
+ dist_to_next = next - r;
+ else
+ dist_to_next = LZ_LSA_DELTA_MAX;
+
+ link[r].lcpnext = l;
+ link[r].dist_to_next = dist_to_next;
+ }
+
+ /* Calculate salink.dist_to_prev and salink.lcpprev.
+ *
+ * This is analgous to dist_to_next and lcpnext as described above, but
+ * in the other direction. That is, here we're interested in, for each
+ * rank, the largest smaller rank that corresponds to a suffix starting
+ * earlier in the string.
+ *
+ * To save memory we don't have a "prev_initial" field, but rather store
+ * those values in the LCP array. */
+ LCP[0] = LZ_LSA_POS_MAX;
+ link[0].lcpprev = 0;
+ for (u32 r = 1; r < n; r++) {
+ u32 t = r - 1;
+ u32 l = LCP[r];
+ while (t != LZ_LSA_POS_MAX && SA[t] > SA[r]) {
+ l = min(l, link[t].lcpprev);
+ t = LCP[t];
+ }
+ LCP[r] = t;
+
+ if (l < min_match_len)
+ l = 0;
+ else if (l > max_match_len)
+ l = max_match_len;
+
+ link[r].lcpprev = l;
+ }
+ for (u32 r = 0; r < n; r++) {
+
+ u32 prev = LCP[r];
+
+ if (prev == LZ_LSA_POS_MAX)
+ link[r].dist_to_prev = 0;
+ else if (r - prev <= LZ_LSA_DELTA_MAX)
+ link[r].dist_to_prev = r - prev;
+ else
+ link[r].dist_to_prev = LZ_LSA_DELTA_MAX;
+ }
+}
+
+/* If ENABLE_LZ_DEBUG is defined, verify the values computed by init_salink().
+ *
+ * WARNING: this is for debug use only as it does not necessarily run in linear
+ * time!!! */
+static void
+verify_salink(const struct salink link[], const u32 SA[], const u8 T[], u32 n,
+ lz_lsa_len_t min_match_len, lz_lsa_len_t max_match_len)
+{
+#ifdef ENABLE_LZ_DEBUG
+ for (u32 r = 0; r < n; r++) {
+ for (u32 prev = r; ; ) {
+ if (prev == 0) {
+ LZ_ASSERT(link[r].dist_to_prev == 0);
+ LZ_ASSERT(link[r].lcpprev == 0);
+ break;
+ }
+
+ prev--;
+
+ if (SA[prev] < SA[r]) {
+ LZ_ASSERT(link[r].dist_to_prev == min(r - prev, LZ_LSA_DELTA_MAX));
+
+ u32 lcpprev;
+ for (lcpprev = 0;
+ lcpprev < min(n - SA[prev], n - SA[r]) &&
+ T[SA[prev] + lcpprev] == T[SA[r] + lcpprev];
+ lcpprev++)
+ ;
+ if (lcpprev < min_match_len)
+ lcpprev = 0;
+ else if (lcpprev > max_match_len)
+ lcpprev = max_match_len;
+
+ LZ_ASSERT(lcpprev == link[r].lcpprev);
+ break;
+ }
+ }
+
+ for (u32 next = r; ; ) {
+ if (next == n - 1) {
+ LZ_ASSERT(link[r].dist_to_next == 0);
+ LZ_ASSERT(link[r].lcpnext == 0);
+ break;
+ }
+
+ next++;
+
+ if (SA[next] < SA[r]) {
+ LZ_ASSERT(link[r].dist_to_next == min(next - r, LZ_LSA_DELTA_MAX));
+
+ u32 lcpnext;
+ for (lcpnext = 0;
+ lcpnext < min(n - SA[next], n - SA[r]) &&
+ T[SA[next] + lcpnext] == T[SA[r] + lcpnext];
+ lcpnext++)
+ ;
+ if (lcpnext < min_match_len)
+ lcpnext = 0;
+ else if (lcpnext > max_match_len)
+ lcpnext = max_match_len;
+
+ LZ_ASSERT(lcpnext == link[r].lcpnext);
+ break;
+ }
+ }
+ }
+#endif
+}
+
+static inline void
+lz_lsa_update_salink(const u32 r, struct salink link[])
+{
+ const u32 next = r + link[r].dist_to_next;
+ const u32 prev = r - link[r].dist_to_prev;
+
+ if (next != r && link[r].dist_to_next < link[next].dist_to_prev) {
+ link[next].dist_to_prev = link[r].dist_to_next;
+ link[next].lcpprev = link[r].lcpnext;
+ }
+
+ if (prev != r && link[r].dist_to_prev < link[prev].dist_to_next) {
+ link[prev].dist_to_next = link[r].dist_to_prev;
+ link[prev].lcpnext = link[r].lcpprev;
+ }
+}
+
+static void
+lz_lsa_set_default_params(struct lz_mf_params *params)
+{
+ if (params->min_match_len == 0)
+ params->min_match_len = 2;
+
+ if (params->max_match_len == 0)
+ params->max_match_len = params->max_window_size;
+
+ if (params->max_match_len > LZ_LSA_LEN_MAX)
+ params->max_match_len = LZ_LSA_LEN_MAX;
+
+ if (params->max_search_depth == 0)
+ params->max_search_depth = 32;
+
+ /* Scale max_search_depth down since this algorithm finds the longest
+ * matches first. */
+ params->max_search_depth = DIV_ROUND_UP(params->max_search_depth, 5);
+}
+
+static u64
+lz_lsa_get_needed_memory(u32 max_window_size)
+{
+ u64 size = 0;
+
+ /* SA */
+ size += (u64)max_window_size * sizeof(u32);
+
+ /* ISA */
+ size += (u64)max_window_size * sizeof(u32);
+
+ /* salink and minimum temporary space for divsufsort */
+ size += max(BUILD_SA_MIN_TMP_LEN * sizeof(u32),
+ (u64)max_window_size * sizeof(struct salink));
+
+ return size;
+}
+
+static bool
+lz_lsa_params_valid(const struct lz_mf_params *params)
+{
+ return true;
+}
+
+static bool
+lz_lsa_init(struct lz_mf *_mf)
+{
+ struct lz_lsa *mf = (struct lz_lsa *)_mf;
+ const u32 max_window_size = mf->base.params.max_window_size;
+
+ lz_lsa_set_default_params(&mf->base.params);
+
+ /* SA and ISA will share the same allocation. */
+ mf->SA = MALLOC(max_window_size * 2 * sizeof(u32));
+ if (!mf->SA)
+ return false;
+
+ mf->salink = MALLOC(max(BUILD_SA_MIN_TMP_LEN * sizeof(u32),
+ max_window_size * sizeof(struct salink)));
+ if (!mf->salink) {
+ FREE(mf->SA);
+ return false;
+ }
+
+ return true;
+}
+
+static void
+lz_lsa_load_window(struct lz_mf *_mf, const u8 T[], u32 n)
+{
+ struct lz_lsa *mf = (struct lz_lsa *)_mf;
+ u32 *ISA, *LCP;
+
+ build_SA(mf->SA, T, n, (u32 *)mf->salink);
+
+ /* Compute ISA (Inverse Suffix Array) in a preliminary position.
+ *
+ * This is just a trick to save memory. Since LCP is unneeded after
+ * this function, it can be computed in any available space. The
+ * storage for the ISA is the best choice because the ISA can be built
+ * quickly in salink for now, then re-built in its real location at the
+ * end. This is probably worth it because computing the ISA from the SA
+ * is very fast, and since this match-finder is memory-hungry we'd like
+ * to save as much memory as possible. */
+ BUILD_BUG_ON(sizeof(mf->salink[0]) < sizeof(mf->ISA[0]));
+ ISA = (u32 *)mf->salink;
+ build_ISA(ISA, mf->SA, n);
+
+ /* Compute LCP (Longest Common Prefix) array. */
+ LCP = mf->SA + n;
+ build_LCP(LCP, mf->SA, ISA, T, n);
+
+ /* Initialize suffix array links. */
+ init_salink(mf->salink, LCP, mf->SA, T, n,
+ mf->base.params.min_match_len,
+ mf->base.params.max_match_len);
+ verify_salink(mf->salink, mf->SA, T, n,
+ mf->base.params.min_match_len,
+ mf->base.params.max_match_len);
+
+ /* Compute ISA (Inverse Suffix Array) in its final position. */
+ ISA = mf->SA + n;
+ build_ISA(ISA, mf->SA, n);
+
+ /* Save new variables and return. */
+ mf->ISA = ISA;
+}
+
+static u32
+lz_lsa_get_matches(struct lz_mf *_mf, struct lz_match matches[])
+{
+ struct lz_lsa *mf = (struct lz_lsa *)_mf;
+ const u32 i = mf->base.cur_window_pos++;
+
+ const u32 * const restrict SA = mf->SA;
+ const u32 * const restrict ISA = mf->ISA;
+ struct salink * const restrict link = mf->salink;
+
+ /* r = Rank of the suffix at the current position. */
+ const u32 r = ISA[i];
+
+ /* Prepare for searching the current position. */
+ lz_lsa_update_salink(r, link);
+
+ /* Prefetch next position in SA and link.
+ *
+ * This can improve performance on large windows since the locations in
+ * SA and link at which each successive search begins are in general
+ * randomly distributed. */
+ if (likely(i + 1 < mf->base.cur_window_size)) {
+ const u32 next_r = ISA[i + 1];
+ prefetch(&SA[next_r]);
+ prefetch(&link[next_r]);
+ }
+
+ /* L = rank of next suffix to the left;
+ * R = rank of next suffix to the right;
+ * lenL = length of match between current position and the suffix with rank L;
+ * lenR = length of match between current position and the suffix with rank R.
+ *
+ * This is left and right relative to the rank of the current suffix.
+ * Since the suffixes in the suffix array are sorted, the longest
+ * matches are immediately to the left and right (using the linked list
+ * to ignore all suffixes that occur later in the window). The match
+ * length decreases the farther left and right we go. We shall keep the
+ * length on both sides in sync in order to choose the lowest-cost match
+ * of each length.
+ */
+ u32 L = r - link[r].dist_to_prev;
+ u32 R = r + link[r].dist_to_next;
+ u32 lenL = link[r].lcpprev;
+ u32 lenR = link[r].lcpnext;
+
+ /* num_matches = number of matches found so far. */
+ u32 num_matches = 0;
+
+ /* best_offset = offset of lowest-cost match found so far.
+ *
+ * Shorter matches that do not have a lower offset than this are
+ * discarded, since presumably it would be cheaper to output the bytes
+ * from the longer match instead. */
+ u32 best_offset = LZ_LSA_POS_MAX;
+
+ /* count_remaining = maximum number of possible matches remaining to be
+ * considered. */
+ u32 count_remaining = mf->base.params.max_search_depth;
+
+ /* pending_offset = offset of lowest-cost match found for the current
+ * length, or 0 if none found yet. */
+ u32 pending_offset = 0;
+
+ /* Note: some 'goto' statements are used in the remainder of this
+ * function to remove unnecessary checks and create branches that the
+ * CPU may predict better. (This function is performance critical.) */
+
+ if (lenL != 0 && lenL >= lenR)
+ goto extend_left;
+ else if (lenR != 0)
+ goto extend_right;
+ else
+ return 0;
+
+extend_left:
+ /* Search suffixes on the left until the match length has decreased
+ * below the next match length on the right or to below the minimum
+ * match length. */
+ for (;;) {
+ u32 offset;
+ u32 old_L;
+ u32 old_lenL;
+
+ /* Check for hard cutoff on amount of work done. */
+ if (count_remaining-- == 0) {
+ if (pending_offset != 0) {
+ /* Save pending match. */
+ matches[num_matches++] = (struct lz_match) {
+ .len = lenL,
+ .offset = pending_offset,
+ };
+ }
+ goto out;
+ }
+
+ if (SA[L] < i) {
+ /* Suffix is in LZ77 dictionary. (Check was needed
+ * because the salink array caps distances to save
+ * memory.) */
+
+ offset = i - SA[L];
+
+ /* Save match offset if it results in lower cost. */
+ if (offset < best_offset) {
+ best_offset = offset;
+ pending_offset = offset;
+ }
+ }
+
+ /* Advance left to previous suffix. */
+
+ old_L = L;
+ old_lenL = lenL;
+
+ L -= link[L].dist_to_prev;
+
+ if (link[old_L].lcpprev < old_lenL) {
+ /* Match length decreased. */
+
+ lenL = link[old_L].lcpprev;
+
+ if (old_lenL > lenR) {
+ /* Neither the right side nor the left size has
+ * any more matches of length @old_lenL. If a
+ * pending match exists, save it. */
+ if (pending_offset != 0) {
+ matches[num_matches++] = (struct lz_match) {
+ .len = old_lenL,
+ .offset = pending_offset,
+ };
+ pending_offset = 0;
+ }
+
+ if (lenL >= lenR) {
+ /* New match length on left is still at
+ * least as large as the next match
+ * length on the right: Keep extending
+ * left, unless the minimum match length
+ * would be underrun. */
+ if (lenL == 0)
+ goto out;
+ goto extend_left;
+ }
+ }
+
+ /* Here we have lenL < lenR. Extend right.
+ * (No check for whether the minimum match length has
+ * been underrun is needed, provided that such lengths
+ * are marked as 0.) */
+ goto extend_right;
+ }
+ }
+
+extend_right:
+ /* Search suffixes on the right until the match length has decreased to
+ * the next match length on the left or to below the minimum match
+ * length. */
+ for (;;) {
+ u32 offset;
+ u32 old_R;
+ u32 old_lenR;
+
+ /* Check for hard cutoff on amount of work done. */
+ if (count_remaining-- == 0) {
+ if (pending_offset != 0) {
+ /* Save pending match. */
+ matches[num_matches++] = (struct lz_match) {
+ .len = lenR,
+ .offset = pending_offset,
+ };
+ }
+ goto out;
+ }
+
+ if (SA[R] < i) {
+ /* Suffix is in LZ77 dictionary. (Check was needed
+ * because the salink array caps distances to save
+ * memory.) */
+
+ offset = i - SA[R];
+
+ if (offset < best_offset) {
+ best_offset = offset;
+ pending_offset = offset;
+ }
+ }
+
+ /* Advance right to next suffix. */
+
+ old_R = R;
+ old_lenR = lenR;
+
+ R += link[R].dist_to_next;
+
+ if (link[old_R].lcpnext < lenR) {
+ /* Match length decreased. */
+
+ lenR = link[old_R].lcpnext;
+
+ /* Neither the right side nor the left size has any more
+ * matches of length @old_lenR. If a pending match
+ * exists, save it. */
+ if (pending_offset != 0) {
+ matches[num_matches++] = (struct lz_match) {
+ .len = old_lenR,
+ .offset = pending_offset,
+ };
+ pending_offset = 0;
+ }
+
+ if (lenL >= lenR) {
+ /* lenL >= lenR: Extend left, unless the
+ * minimum match length would be underrun, in
+ * which case we are done. */
+ if (lenL == 0)
+ goto out;
+
+ goto extend_left;
+ }
+ /* lenR > lenL: Keep extending right.
+ * (No check for whether the minimum match length has
+ * been underrun is needed, provided that such lengths
+ * are marked as 0.) */
+ }
+ }
+
+out:
+ for (u32 i = 0; i < num_matches / 2; i++)
+ swap(matches[i], matches[num_matches - 1 - i]);
+ return num_matches;
+}
+
+static void
+lz_lsa_skip_positions(struct lz_mf *_mf, u32 n)
+{
+ struct lz_lsa *mf = (struct lz_lsa *)_mf;
+ do {
+ lz_lsa_update_salink(mf->ISA[mf->base.cur_window_pos++], mf->salink);
+ } while (--n);
+}
+
+static void
+lz_lsa_destroy(struct lz_mf *_mf)
+{
+ struct lz_lsa *mf = (struct lz_lsa *)_mf;
+
+ FREE(mf->SA);
+ FREE(mf->salink);
+}
+
+const struct lz_mf_ops lz_linked_suffix_array_ops = {
+ .params_valid = lz_lsa_params_valid,
+ .get_needed_memory = lz_lsa_get_needed_memory,
+ .init = lz_lsa_init,
+ .load_window = lz_lsa_load_window,
+ .get_matches = lz_lsa_get_matches,
+ .skip_positions = lz_lsa_skip_positions,
+ .destroy = lz_lsa_destroy,
+ .struct_size = sizeof(struct lz_lsa),
+};