From: Eric Biggers Date: Fri, 10 Jan 2014 16:40:43 +0000 (-0600) Subject: lz_sarray: Fix some comments X-Git-Tag: v1.6.1~73 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=5c0846b71891c0c088de4f9ef4cc6ab545f5b64e lz_sarray: Fix some comments --- diff --git a/include/wimlib/lz_sarray.h b/include/wimlib/lz_sarray.h index 6f878dd2..0c7007b7 100644 --- a/include/wimlib/lz_sarray.h +++ b/include/wimlib/lz_sarray.h @@ -137,7 +137,7 @@ struct salink { * prefix (LCP) between the suffix having this rank and * the suffix with the the smallest larger rank that * starts earlier in the window than the suffix having - * this rank. + * this rank. If no such suffix exists, this will be 0. * * Later, during match-finding, after the corresponding * suffix has entered the LZ77 dictionary, this value @@ -151,7 +151,8 @@ struct salink { * common prefix (LCP) between the suffix having this * rank and the suffix with the the largest smaller rank * that starts earlier in the window than the suffix - * having this rank. + * having this rank. If no such suffix exists, this + * will be 0. * * Later, during match-finding, after the corresponding * suffix has entered the LZ77 dictionary, this value @@ -163,22 +164,24 @@ struct salink { /* Distance to the suffix referred to in the description * of "lcpnext" above, but capped to a maximum value to - * save memory. If the true distance was truncated, - * this will give the distance to the rank of a suffix - * that is lexicographically closer to the current - * suffix than the desired suffix, but appears *later* - * in the window and hence cannot be used as the basis - * for a LZ77 match. */ + * save memory; or, 0 if no such suffix exists. If the + * true distance was truncated, this will give the + * distance to the rank of a suffix that is + * lexicographically closer to the current suffix than + * the desired suffix, but appears *later* in the window + * and hence cannot be used as the basis for a LZ77 + * match. */ lz_sarray_delta_t dist_to_next; /* Distance to the suffix referred to in the description * of "lcpprev" above, but capped to a maximum value to - * save memory. If the true distance was truncated, - * this will give the distance to the rank of a suffix - * that is lexicographically closer to the current - * suffix than the desired suffix, but appears *later* - * in the window and hence cannot be used as the basis - * for a LZ77 match. */ + * save memory; or, 0 if no such suffix exists. If the + * true distance was truncated, this will give the + * distance to the rank of a suffix that is + * lexicographically closer to the current suffix than + * the desired suffix, but appears *later* in the window + * and hence cannot be used as the basis for a LZ77 + * match. */ lz_sarray_delta_t dist_to_prev; }; }; @@ -315,7 +318,7 @@ lz_sarray_get_matches(struct lz_sarray *mf, u32 count_remaining = max_matches_to_consider; /* pending_offset = offset of lowest-cost match found for the current - * length, or 0 if no match has been found yet. */ + * length, or 0 if none found yet. */ lz_sarray_pos_t pending_offset = 0; /* Note: some 'goto' statements are used in the remainder of this @@ -367,7 +370,7 @@ extend_left: } } - /* Advance left to next suffix. */ + /* Advance left to previous suffix. */ old_L = L; old_lenL = lenL; @@ -479,13 +482,7 @@ extend_right: pending_offset = 0; } - if (lenR > lenL) { - /* lenR > lenL: Keep extending right, unless - * the minimum match length would be underrun. - */ - if (lenR == 0) - return nmatches; - } else { + if (lenL >= lenR) { /* lenL >= lenR: Extend left, unless the * minimum match length would be underrun, in * which case we are done. */ @@ -494,6 +491,10 @@ extend_right: goto extend_left; } + /* lenR > lenL: Keep extending right. + * (No check for whether the minimum match length has + * been underrun is needed, provided that such lengths + * are marked as 0.) */ } } } diff --git a/src/lz_sarray.c b/src/lz_sarray.c index 33acd612..3d8484e4 100644 --- a/src/lz_sarray.c +++ b/src/lz_sarray.c @@ -40,8 +40,8 @@ #include "divsufsort/divsufsort.h" #include -#define DIVSUFSORT_TMP1_SIZE (256 * sizeof(saidx_t)) -#define DIVSUFSORT_TMP2_SIZE (256 * 256 * sizeof(saidx_t)) +#define DIVSUFSORT_TMP1_SIZE (256 * sizeof(saidx_t)) /* bucket_A */ +#define DIVSUFSORT_TMP2_SIZE (256 * 256 * sizeof(saidx_t)) /* bucket_B */ /* If ENABLE_LZ_DEBUG is defined, verify that the suffix array satisfies its * definition. @@ -201,15 +201,15 @@ init_salink(struct salink link[restrict], * Pass 1 calculates, for each suffix rank, the corresponding * "next_initial" value which is the smallest larger rank that * corresponds to a suffix starting earlier in the string. It also - * calculates "lcpnext_initial", which is the number of bytes shared - * with that suffix, although to eliminate checks in - * lz_sarray_get_matches(), "lcpnext_initial" is set to 0 if it's less - * than the minimum match length or set to the maximum match length if - * it's greater than the maximum match length. + * calculates "lcpnext_initial", which is the longest common prefix with + * that suffix, although to eliminate checks in lz_sarray_get_matches(), + * "lcpnext_initial" is set to 0 if it's less than the minimum match + * length or set to the maximum match length if it's greater than the + * maximum match length. * * Pass 2 translates each absolute "next_initial", a 4-byte value, into * a relative "dist_to_next", a 1-byte value. This is done to save - * memory. In the case that the true relative distance cannot be + * memory. In the case that the exact relative distance cannot be * encoded in 1 byte, it is capped to 255. This is valid as long as * lz_sarray_get_matches() validates each position before using it. * Note that "lcpnext" need not be updated in this case because it will @@ -259,8 +259,7 @@ init_salink(struct salink link[restrict], * earlier in the string. * * To save memory we don't have a "prev_initial" field, but rather store - * those values in the LCP array. - */ + * those values in the LCP array. */ LCP[0] = LZ_SARRAY_POS_MAX; link[0].lcpprev = 0; for (lz_sarray_pos_t r = 1; r < n; r++) {