X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=include%2Fwimlib%2Flz_sarray.h;h=e609d0c2e79dbbcc93a668475ae0f4fa6a53d0dd;hb=2fdc3bd720f5bc49680dc2284ea42a537d1acc07;hp=6f878dd2bafd9d89b9d16ba80b88d70fa46e5217;hpb=31274ba5cbf4de73c8a68e7d17f490c3b0df6cff;p=wimlib diff --git a/include/wimlib/lz_sarray.h b/include/wimlib/lz_sarray.h index 6f878dd2..e609d0c2 100644 --- a/include/wimlib/lz_sarray.h +++ b/include/wimlib/lz_sarray.h @@ -34,7 +34,8 @@ #ifndef _WIMLIB_LZ_SARRAY_H #define _WIMLIB_LZ_SARRAY_H -#include "wimlib/compiler.h" /* must define '_always_inline_attribute' */ +#include "wimlib/compiler.h" /* must define '_always_inline_attribute', + 'likely()', and 'prefetch()'. */ #include "wimlib/lz.h" /* must define 'struct raw_match' and LZ_ASSERT() */ #include "wimlib/types.h" /* must define 'bool', 'u8', 'u16, and 'u32' */ @@ -135,9 +136,9 @@ struct salink { struct { /* Intially, the length, in bytes, of the longest common * prefix (LCP) between the suffix having this rank and - * the suffix with the the smallest larger rank that + * the suffix with the smallest larger rank that * starts earlier in the window than the suffix having - * this rank. + * this rank. If no such suffix exists, this will be 0. * * Later, during match-finding, after the corresponding * suffix has entered the LZ77 dictionary, this value @@ -149,9 +150,10 @@ struct salink { /* Initially, the length, in bytes, of the longest * common prefix (LCP) between the suffix having this - * rank and the suffix with the the largest smaller rank + * rank and the suffix with the largest smaller rank * that starts earlier in the window than the suffix - * having this rank. + * having this rank. If no such suffix exists, this + * will be 0. * * Later, during match-finding, after the corresponding * suffix has entered the LZ77 dictionary, this value @@ -163,22 +165,24 @@ struct salink { /* Distance to the suffix referred to in the description * of "lcpnext" above, but capped to a maximum value to - * save memory. If the true distance was truncated, - * this will give the distance to the rank of a suffix - * that is lexicographically closer to the current - * suffix than the desired suffix, but appears *later* - * in the window and hence cannot be used as the basis - * for a LZ77 match. */ + * save memory; or, 0 if no such suffix exists. If the + * true distance was truncated, this will give the + * distance to the rank of a suffix that is + * lexicographically closer to the current suffix than + * the desired suffix, but appears *later* in the window + * and hence cannot be used as the basis for an LZ77 + * match. */ lz_sarray_delta_t dist_to_next; /* Distance to the suffix referred to in the description * of "lcpprev" above, but capped to a maximum value to - * save memory. If the true distance was truncated, - * this will give the distance to the rank of a suffix - * that is lexicographically closer to the current - * suffix than the desired suffix, but appears *later* - * in the window and hence cannot be used as the basis - * for a LZ77 match. */ + * save memory; or, 0 if no such suffix exists. If the + * true distance was truncated, this will give the + * distance to the rank of a suffix that is + * lexicographically closer to the current suffix than + * the desired suffix, but appears *later* in the window + * and hence cannot be used as the basis for an LZ77 + * match. */ lz_sarray_delta_t dist_to_prev; }; }; @@ -282,6 +286,19 @@ lz_sarray_get_matches(struct lz_sarray *mf, /* Prepare for searching the current position. */ lz_sarray_update_salink(r, link); +#if 1 + /* Prefetch next position in SA and link. + * + * This can improve performance on large windows since the locations in + * SA and link at which each successive search begins are in general + * randomly distributed. */ + if (likely(i + 1 < mf->window_size)) { + const lz_sarray_pos_t next_r = ISA[i + 1]; + prefetch(&SA[next_r]); + prefetch(&link[next_r]); + } +#endif + /* L = rank of next suffix to the left; * R = rank of next suffix to the right; * lenL = length of match between current position and the suffix with rank L; @@ -315,7 +332,7 @@ lz_sarray_get_matches(struct lz_sarray *mf, u32 count_remaining = max_matches_to_consider; /* pending_offset = offset of lowest-cost match found for the current - * length, or 0 if no match has been found yet. */ + * length, or 0 if none found yet. */ lz_sarray_pos_t pending_offset = 0; /* Note: some 'goto' statements are used in the remainder of this @@ -367,7 +384,7 @@ extend_left: } } - /* Advance left to next suffix. */ + /* Advance left to previous suffix. */ old_L = L; old_lenL = lenL; @@ -406,10 +423,10 @@ extend_left: } } - /* Here we have lenL < lenR. Extend right, unless the - * minimum match length would be underrun. */ - if (lenR == 0) - return nmatches; + /* Here we have lenL < lenR. Extend right. + * (No check for whether the minimum match length has + * been underrun is needed, provided that such lengths + * are marked as 0.) */ goto extend_right; } } @@ -479,13 +496,7 @@ extend_right: pending_offset = 0; } - if (lenR > lenL) { - /* lenR > lenL: Keep extending right, unless - * the minimum match length would be underrun. - */ - if (lenR == 0) - return nmatches; - } else { + if (lenL >= lenR) { /* lenL >= lenR: Extend left, unless the * minimum match length would be underrun, in * which case we are done. */ @@ -494,6 +505,10 @@ extend_right: goto extend_left; } + /* lenR > lenL: Keep extending right. + * (No check for whether the minimum match length has + * been underrun is needed, provided that such lengths + * are marked as 0.) */ } } }