X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzx_common.c;h=bd2e097fe7e26fd5832537ac8f6bcc708dcc990f;hp=76c73baea447aaae7d4bcfeb87c12cfe79ca7bd5;hb=d1284a3b721162794ebd7131d090ab7c0cba92a3;hpb=40a690416a3951361ec77d33a723dd4497fb7585 diff --git a/src/lzx_common.c b/src/lzx_common.c index 76c73bae..bd2e097f 100644 --- a/src/lzx_common.c +++ b/src/lzx_common.c @@ -1,5 +1,5 @@ /* - * lzx-common.c - Common code for LZX compression and decompression. + * lzx_common.c - Common code for LZX compression and decompression. */ /* @@ -35,6 +35,10 @@ # include #endif +#ifdef __AVX2__ +# include +#endif + /* Mapping: offset slot => first match offset that uses that offset slot. */ const u32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS] = { @@ -266,7 +270,17 @@ lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32)) for (;;) { u32 e8_mask; u8 *orig_p = p; - #ifdef __SSE2__ + #ifdef __AVX2__ + const __m256i e8_bytes = _mm256_set1_epi8(0xE8); + for (;;) { + __m256i bytes = *(const __m256i *)p; + __m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes); + e8_mask = _mm256_movemask_epi8(cmpresult); + if (e8_mask) + break; + p += 32; + } + #else const __m128i e8_bytes = _mm_set1_epi8(0xE8); for (;;) { /* Read the next 32 bytes of data and test them @@ -286,17 +300,6 @@ lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32)) } p += 32; } - #else - /* AVX-2 */ - const __m256i e8_bytes = _mm256_set1_epi8(0xE8); - for (;;) { - __m256i bytes = *(const __m256i *)p; - __m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes); - e8_mask = _mm256_movemask_epi8(cmpresult); - if (e8_mask) - break; - p += 32; - } #endif /* Did we pass over data with no E8 bytes? */