From: Eric Biggers Date: Fri, 31 Jan 2014 21:07:59 +0000 (-0600) Subject: lz_sarray: Prefetch portion of suffix array to be searched X-Git-Tag: v1.6.2~33 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=fc897414e8b4ba34794d3179428a56773f45e9a3 lz_sarray: Prefetch portion of suffix array to be searched Seems to improve performance of LZX and LZMS compression on large chunks by ~5%. Results may vary. --- diff --git a/include/wimlib/compiler.h b/include/wimlib/compiler.h index d32d092b..21fabd99 100644 --- a/include/wimlib/compiler.h +++ b/include/wimlib/compiler.h @@ -22,6 +22,7 @@ # define likely(x) __builtin_expect(!!(x), 1) # define unlikely(x) __builtin_expect(!!(x), 0) # define inline inline __attribute__((always_inline)) +# define prefetch(x) __builtin_prefetch(x) #else # define WIMLIBAPI # define _always_inline_attribute inline @@ -33,6 +34,7 @@ # define _aligned_attribute(size) # define likely(x) (x) # define unlikely(x) (x) +# define prefetch(x) #endif /* __GNUC__ */ #endif /* _WIMLIB_COMPILER_H */ diff --git a/include/wimlib/lz_sarray.h b/include/wimlib/lz_sarray.h index 5b63ae7c..30dc2b30 100644 --- a/include/wimlib/lz_sarray.h +++ b/include/wimlib/lz_sarray.h @@ -34,7 +34,8 @@ #ifndef _WIMLIB_LZ_SARRAY_H #define _WIMLIB_LZ_SARRAY_H -#include "wimlib/compiler.h" /* must define '_always_inline_attribute' */ +#include "wimlib/compiler.h" /* must define '_always_inline_attribute', + 'likely()', and 'prefetch()'. */ #include "wimlib/lz.h" /* must define 'struct raw_match' and LZ_ASSERT() */ #include "wimlib/types.h" /* must define 'bool', 'u8', 'u16, and 'u32' */ @@ -285,6 +286,19 @@ lz_sarray_get_matches(struct lz_sarray *mf, /* Prepare for searching the current position. */ lz_sarray_update_salink(r, link); +#if 1 + /* Prefetch next position in SA and link. + * + * This can improve performance on large windows since the locations in + * SA and link at which each successive search begins are in general + * randomly distributed. */ + if (likely(i + 1 < mf->window_size)) { + const lz_sarray_pos_t next_r = ISA[i + 1]; + prefetch(&SA[next_r]); + prefetch(&link[next_r]); + } +#endif + /* L = rank of next suffix to the left; * R = rank of next suffix to the right; * lenL = length of match between current position and the suffix with rank L;