From: Eric Biggers Date: Mon, 29 Dec 2014 02:03:09 +0000 (-0600) Subject: Cleanups from recent changes X-Git-Tag: v1.7.4~7 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=d1284a3b721162794ebd7131d090ab7c0cba92a3 Cleanups from recent changes --- diff --git a/NEWS b/NEWS index b5f7be59..e73e9355 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,6 @@ Version 1.7.4-BETA: - The Windows binary distribution no longer contain third party DLLs. - These dependencies are instead compiled directly into libwim.dll. + The Windows binary distribution no longer contains third party DLLs. + These dependencies are instead compiled directly into the libwim DLL. Added more fixes for wimlib on non-x86 architectures such as ARM. diff --git a/src/lzms_common.c b/src/lzms_common.c index fccc5667..117e2f32 100644 --- a/src/lzms_common.c +++ b/src/lzms_common.c @@ -1,5 +1,5 @@ /* - * lzms-common.c - Common code for LZMS compression and decompression + * lzms_common.c - Common code for LZMS compression and decompression */ /* diff --git a/src/lzms_decompress.c b/src/lzms_decompress.c index 3be7cd13..8cb096f2 100644 --- a/src/lzms_decompress.c +++ b/src/lzms_decompress.c @@ -793,7 +793,6 @@ lzms_decode_items(struct lzms_decompressor * const restrict d, d->recent_delta_offsets[2] = d->recent_delta_offsets[1]; d->recent_delta_offsets[1] = d->recent_delta_offsets[0]; d->recent_delta_offsets[0] = d->pending_delta_offset; - d->pending_delta_offset = 0; } d->pending_delta_offset = raw_offset | ((u64)power << 32); diff --git a/src/lzx_common.c b/src/lzx_common.c index 76c73bae..bd2e097f 100644 --- a/src/lzx_common.c +++ b/src/lzx_common.c @@ -1,5 +1,5 @@ /* - * lzx-common.c - Common code for LZX compression and decompression. + * lzx_common.c - Common code for LZX compression and decompression. */ /* @@ -35,6 +35,10 @@ # include #endif +#ifdef __AVX2__ +# include +#endif + /* Mapping: offset slot => first match offset that uses that offset slot. */ const u32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS] = { @@ -266,7 +270,17 @@ lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32)) for (;;) { u32 e8_mask; u8 *orig_p = p; - #ifdef __SSE2__ + #ifdef __AVX2__ + const __m256i e8_bytes = _mm256_set1_epi8(0xE8); + for (;;) { + __m256i bytes = *(const __m256i *)p; + __m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes); + e8_mask = _mm256_movemask_epi8(cmpresult); + if (e8_mask) + break; + p += 32; + } + #else const __m128i e8_bytes = _mm_set1_epi8(0xE8); for (;;) { /* Read the next 32 bytes of data and test them @@ -286,17 +300,6 @@ lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32)) } p += 32; } - #else - /* AVX-2 */ - const __m256i e8_bytes = _mm256_set1_epi8(0xE8); - for (;;) { - __m256i bytes = *(const __m256i *)p; - __m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes); - e8_mask = _mm256_movemask_epi8(cmpresult); - if (e8_mask) - break; - p += 32; - } #endif /* Did we pass over data with no E8 bytes? */