From d1284a3b721162794ebd7131d090ab7c0cba92a3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 28 Dec 2014 20:03:09 -0600 Subject: [PATCH] Cleanups from recent changes --- NEWS | 4 ++-- src/lzms_common.c | 2 +- src/lzms_decompress.c | 1 - src/lzx_common.c | 29 ++++++++++++++++------------- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/NEWS b/NEWS index b5f7be59..e73e9355 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,6 @@ Version 1.7.4-BETA: - The Windows binary distribution no longer contain third party DLLs. - These dependencies are instead compiled directly into libwim.dll. + The Windows binary distribution no longer contains third party DLLs. + These dependencies are instead compiled directly into the libwim DLL. Added more fixes for wimlib on non-x86 architectures such as ARM. diff --git a/src/lzms_common.c b/src/lzms_common.c index fccc5667..117e2f32 100644 --- a/src/lzms_common.c +++ b/src/lzms_common.c @@ -1,5 +1,5 @@ /* - * lzms-common.c - Common code for LZMS compression and decompression + * lzms_common.c - Common code for LZMS compression and decompression */ /* diff --git a/src/lzms_decompress.c b/src/lzms_decompress.c index 3be7cd13..8cb096f2 100644 --- a/src/lzms_decompress.c +++ b/src/lzms_decompress.c @@ -793,7 +793,6 @@ lzms_decode_items(struct lzms_decompressor * const restrict d, d->recent_delta_offsets[2] = d->recent_delta_offsets[1]; d->recent_delta_offsets[1] = d->recent_delta_offsets[0]; d->recent_delta_offsets[0] = d->pending_delta_offset; - d->pending_delta_offset = 0; } d->pending_delta_offset = raw_offset | ((u64)power << 32); diff --git a/src/lzx_common.c b/src/lzx_common.c index 76c73bae..bd2e097f 100644 --- a/src/lzx_common.c +++ b/src/lzx_common.c @@ -1,5 +1,5 @@ /* - * lzx-common.c - Common code for LZX compression and decompression. + * lzx_common.c - Common code for LZX compression and decompression. */ /* @@ -35,6 +35,10 @@ # include #endif +#ifdef __AVX2__ +# include +#endif + /* Mapping: offset slot => first match offset that uses that offset slot. */ const u32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS] = { @@ -266,7 +270,17 @@ lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32)) for (;;) { u32 e8_mask; u8 *orig_p = p; - #ifdef __SSE2__ + #ifdef __AVX2__ + const __m256i e8_bytes = _mm256_set1_epi8(0xE8); + for (;;) { + __m256i bytes = *(const __m256i *)p; + __m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes); + e8_mask = _mm256_movemask_epi8(cmpresult); + if (e8_mask) + break; + p += 32; + } + #else const __m128i e8_bytes = _mm_set1_epi8(0xE8); for (;;) { /* Read the next 32 bytes of data and test them @@ -286,17 +300,6 @@ lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32)) } p += 32; } - #else - /* AVX-2 */ - const __m256i e8_bytes = _mm256_set1_epi8(0xE8); - for (;;) { - __m256i bytes = *(const __m256i *)p; - __m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes); - e8_mask = _mm256_movemask_epi8(cmpresult); - if (e8_mask) - break; - p += 32; - } #endif /* Did we pass over data with no E8 bytes? */ -- 2.43.0