2 * lz_extend.h - fast match extension for Lempel-Ziv matchfinding
4 * The following copying information applies to this specific source code file:
6 * Written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
8 * To the extent possible under law, the author(s) have dedicated all copyright
9 * and related and neighboring rights to this software to the public domain
10 * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
11 * Dedication (the "CC0").
13 * This software is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
17 * You should have received a copy of the CC0 along with this software; if not
18 * see <http://creativecommons.org/publicdomain/zero/1.0/>.
21 #ifndef _WIMLIB_LZ_EXTEND_H
22 #define _WIMLIB_LZ_EXTEND_H
24 #include "wimlib/assert.h"
25 #include "wimlib/bitops.h"
26 #include "wimlib/unaligned.h"
28 #include <immintrin.h>
30 /* Return the number of bytes at @matchptr that match the bytes at @strptr, up
31 * to a maximum of @max_len. Initially, @start_len bytes are matched. */
32 static inline machine_word_t
33 lz_extend(const u8 * const strptr, const u8 * const matchptr,
34 const machine_word_t start_len, const machine_word_t max_len)
38 machine_word_t v_word;
41 v_word = load_word_unaligned(&matchptr[len]) ^
42 load_word_unaligned(&strptr[len]);
43 if (v_word != 0 || len >= max_len)
48 return min(max_len, len + (ffsw(v_word) >> 3));
52 const __m256i ones = _mm256_set1_epi8(0xFF);
57 " add $0x20, %[len] \n"
59 " vmovdqu 0x0(%[matchptr],%[len],1), %%ymm0 \n"
60 " vmovdqu 0x0(%[strptr],%[len],1), %%ymm1 \n"
61 " vpcmpeqb %%ymm0, %%ymm1, %%ymm1\n"
62 " vpxor %%ymm1, %[ones], %%ymm1\n"
63 " vpmovmskb %%ymm1, %%ecx\n"
67 /*" add %%cax, %[len] \n"*/
68 " cmp $257, %[len] \n"
71 " add %%rcx, %[len] \n"
73 : [strptr] "r" (strptr), [matchptr] "r" (matchptr), [ones] "x" (ones)
74 : "rcx", "cc", "ymm0", "ymm1", "memory"
78 return min(len, max_len);
82 #endif /* _WIMLIB_LZ_EXTEND_H */