- /* Compute salink.next and salink.lcpnext. */
- link[n - 1].next = ~(input_idx_t)0;
- link[n - 1].lcpnext = 0;
- for (input_idx_t r = n - 2; r != ~(input_idx_t)0; r--) {
- input_idx_t t = r + 1;
- input_idx_t l = LCP[t];
- while (t != ~(input_idx_t)0 && SA[t] > SA[r]) {
- l = min(l, link[t].lcpnext);
- t = link[t].next;
+ /* Calculate salink.dist_to_next and salink.lcpnext.
+ *
+ * Pass 1 calculates, for each suffix rank, the corresponding
+ * "next_initial" value which is the smallest larger rank that
+ * corresponds to a suffix starting earlier in the string. It also
+ * calculates "lcpnext_initial", which is the longest common prefix with
+ * that suffix, although to eliminate checks in lz_sarray_get_matches(),
+ * "lcpnext_initial" is set to 0 if it's less than the minimum match
+ * length or set to the maximum match length if it's greater than the
+ * maximum match length.
+ *
+ * Pass 2 translates each absolute "next_initial", a 4-byte value, into
+ * a relative "dist_to_next", a 1-byte value. This is done to save
+ * memory. In the case that the exact relative distance cannot be
+ * encoded in 1 byte, it is capped to 255. This is valid as long as
+ * lz_sarray_get_matches() validates each position before using it.
+ * Note that "lcpnext" need not be updated in this case because it will
+ * not be used until the actual next rank has been found anyway.
+ */
+ link[n - 1].next_initial = LZ_SARRAY_POS_MAX;
+ link[n - 1].lcpnext_initial = 0;
+ for (lz_sarray_pos_t r = n - 2; r != LZ_SARRAY_POS_MAX; r--) {
+ lz_sarray_pos_t t = r + 1;
+ lz_sarray_pos_t l = LCP[t];
+ while (t != LZ_SARRAY_POS_MAX && SA[t] > SA[r]) {
+ l = min(l, link[t].lcpnext_initial);
+ t = link[t].next_initial;