* prefix (LCP) between the suffix having this rank and
* the suffix with the the smallest larger rank that
* starts earlier in the window than the suffix having
- * this rank.
+ * this rank. If no such suffix exists, this will be 0.
*
* Later, during match-finding, after the corresponding
* suffix has entered the LZ77 dictionary, this value
* common prefix (LCP) between the suffix having this
* rank and the suffix with the the largest smaller rank
* that starts earlier in the window than the suffix
- * having this rank.
+ * having this rank. If no such suffix exists, this
+ * will be 0.
*
* Later, during match-finding, after the corresponding
* suffix has entered the LZ77 dictionary, this value
/* Distance to the suffix referred to in the description
* of "lcpnext" above, but capped to a maximum value to
- * save memory. If the true distance was truncated,
- * this will give the distance to the rank of a suffix
- * that is lexicographically closer to the current
- * suffix than the desired suffix, but appears *later*
- * in the window and hence cannot be used as the basis
- * for a LZ77 match. */
+ * save memory; or, 0 if no such suffix exists. If the
+ * true distance was truncated, this will give the
+ * distance to the rank of a suffix that is
+ * lexicographically closer to the current suffix than
+ * the desired suffix, but appears *later* in the window
+ * and hence cannot be used as the basis for a LZ77
+ * match. */
lz_sarray_delta_t dist_to_next;
/* Distance to the suffix referred to in the description
* of "lcpprev" above, but capped to a maximum value to
- * save memory. If the true distance was truncated,
- * this will give the distance to the rank of a suffix
- * that is lexicographically closer to the current
- * suffix than the desired suffix, but appears *later*
- * in the window and hence cannot be used as the basis
- * for a LZ77 match. */
+ * save memory; or, 0 if no such suffix exists. If the
+ * true distance was truncated, this will give the
+ * distance to the rank of a suffix that is
+ * lexicographically closer to the current suffix than
+ * the desired suffix, but appears *later* in the window
+ * and hence cannot be used as the basis for a LZ77
+ * match. */
lz_sarray_delta_t dist_to_prev;
};
};
u32 count_remaining = max_matches_to_consider;
/* pending_offset = offset of lowest-cost match found for the current
- * length, or 0 if no match has been found yet. */
+ * length, or 0 if none found yet. */
lz_sarray_pos_t pending_offset = 0;
/* Note: some 'goto' statements are used in the remainder of this
}
}
- /* Advance left to next suffix. */
+ /* Advance left to previous suffix. */
old_L = L;
old_lenL = lenL;
pending_offset = 0;
}
- if (lenR > lenL) {
- /* lenR > lenL: Keep extending right, unless
- * the minimum match length would be underrun.
- */
- if (lenR == 0)
- return nmatches;
- } else {
+ if (lenL >= lenR) {
/* lenL >= lenR: Extend left, unless the
* minimum match length would be underrun, in
* which case we are done. */
goto extend_left;
}
+ /* lenR > lenL: Keep extending right.
+ * (No check for whether the minimum match length has
+ * been underrun is needed, provided that such lengths
+ * are marked as 0.) */
}
}
}
#include "divsufsort/divsufsort.h"
#include <string.h>
-#define DIVSUFSORT_TMP1_SIZE (256 * sizeof(saidx_t))
-#define DIVSUFSORT_TMP2_SIZE (256 * 256 * sizeof(saidx_t))
+#define DIVSUFSORT_TMP1_SIZE (256 * sizeof(saidx_t)) /* bucket_A */
+#define DIVSUFSORT_TMP2_SIZE (256 * 256 * sizeof(saidx_t)) /* bucket_B */
/* If ENABLE_LZ_DEBUG is defined, verify that the suffix array satisfies its
* definition.
* Pass 1 calculates, for each suffix rank, the corresponding
* "next_initial" value which is the smallest larger rank that
* corresponds to a suffix starting earlier in the string. It also
- * calculates "lcpnext_initial", which is the number of bytes shared
- * with that suffix, although to eliminate checks in
- * lz_sarray_get_matches(), "lcpnext_initial" is set to 0 if it's less
- * than the minimum match length or set to the maximum match length if
- * it's greater than the maximum match length.
+ * calculates "lcpnext_initial", which is the longest common prefix with
+ * that suffix, although to eliminate checks in lz_sarray_get_matches(),
+ * "lcpnext_initial" is set to 0 if it's less than the minimum match
+ * length or set to the maximum match length if it's greater than the
+ * maximum match length.
*
* Pass 2 translates each absolute "next_initial", a 4-byte value, into
* a relative "dist_to_next", a 1-byte value. This is done to save
- * memory. In the case that the true relative distance cannot be
+ * memory. In the case that the exact relative distance cannot be
* encoded in 1 byte, it is capped to 255. This is valid as long as
* lz_sarray_get_matches() validates each position before using it.
* Note that "lcpnext" need not be updated in this case because it will
* earlier in the string.
*
* To save memory we don't have a "prev_initial" field, but rather store
- * those values in the LCP array.
- */
+ * those values in the LCP array. */
LCP[0] = LZ_SARRAY_POS_MAX;
link[0].lcpprev = 0;
for (lz_sarray_pos_t r = 1; r < n; r++) {