X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=include%2Fwimlib%2Fmatchfinder_avx2.h;h=bdf10d212515aaa1dbcc7a794c5bf1c7b5e5ff05;hp=fe98b636c9a0d27c5dd7dfab35786b34447be2cf;hb=3e8aa757aaa63297f0d54007adf46411778fb6a8;hpb=94f8de6d411d58a0eb2e472cc1b984f195eb2447 diff --git a/include/wimlib/matchfinder_avx2.h b/include/wimlib/matchfinder_avx2.h index fe98b636..bdf10d21 100644 --- a/include/wimlib/matchfinder_avx2.h +++ b/include/wimlib/matchfinder_avx2.h @@ -2,6 +2,12 @@ * matchfinder_avx2.h * * Matchfinding routines optimized for Intel AVX2 (Advanced Vector Extensions). + * + * Author: Eric Biggers + * Year: 2014, 2015 + * + * The author dedicates this file to the public domain. + * You can do whatever you want with this file. */ #include @@ -16,9 +22,9 @@ matchfinder_init_avx2(pos_t *data, size_t size) return false; if (sizeof(pos_t) == 2) - v = _mm256_set1_epi16(MATCHFINDER_INITVAL); + v = _mm256_set1_epi16((u16)MATCHFINDER_NULL); else if (sizeof(pos_t) == 4) - v = _mm256_set1_epi32(MATCHFINDER_INITVAL); + v = _mm256_set1_epi32((u32)MATCHFINDER_NULL); else return false; @@ -33,32 +39,3 @@ matchfinder_init_avx2(pos_t *data, size_t size) } while (--n); return true; } - -static inline bool -matchfinder_rebase_avx2(pos_t *data, size_t size) -{ - __m256i v, *p; - size_t n; - - if ((size % sizeof(__m256i) * 4 != 0)) - return false; - - if (sizeof(pos_t) == 2) - v = _mm256_set1_epi16((pos_t)-MATCHFINDER_WINDOW_SIZE); - else if (sizeof(pos_t) == 4) - v = _mm256_set1_epi32((pos_t)-MATCHFINDER_WINDOW_SIZE); - else - return false; - - p = (__m256i *)data; - n = size / (sizeof(__m256i) * 4); - do { - /* PADDSW: Add Packed Signed Integers With Signed Saturation */ - p[0] = _mm256_adds_epi16(p[0], v); - p[1] = _mm256_adds_epi16(p[1], v); - p[2] = _mm256_adds_epi16(p[2], v); - p[3] = _mm256_adds_epi16(p[3], v); - p += 4; - } while (--n); - return true; -}