2 * lz_extend.h - fast match extension for Lempel-Ziv matchfinding
4 * The following copying information applies to this specific source code file:
6 * Written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
8 * To the extent possible under law, the author(s) have dedicated all copyright
9 * and related and neighboring rights to this software to the public domain
10 * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
11 * Dedication (the "CC0").
13 * This software is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
17 * You should have received a copy of the CC0 along with this software; if not
18 * see <http://creativecommons.org/publicdomain/zero/1.0/>.
21 #ifndef _WIMLIB_LZ_EXTEND_H
22 #define _WIMLIB_LZ_EXTEND_H
24 #include "wimlib/assert.h"
25 #include "wimlib/bitops.h"
26 #include "wimlib/unaligned.h"
28 #include <smmintrin.h>
30 /* Return the number of bytes at @matchptr that match the bytes at @strptr, up
31 * to a maximum of @max_len. Initially, @start_len bytes are matched. */
32 static inline machine_word_t
33 lz_extend(const u8 * const strptr, const u8 * const matchptr,
34 const machine_word_t start_len, const machine_word_t max_len)
38 machine_word_t v_word;
41 v_word = load_word_unaligned(&matchptr[len]) ^
42 load_word_unaligned(&strptr[len]);
43 if (v_word != 0 || len >= max_len)
48 return min(max_len, len + (ffsw(v_word) >> 3));
55 " movdqu 0x0(%[strptr],%[len],1), %%xmm0 \n"
56 " pcmpestri $0x18, 0x0(%[matchptr],%[len],1), %%xmm0 \n"
58 " add $0x10, %[len] \n"
60 " cmp $257, %[len] \n"
62 " xor %%rcx, %%rcx \n"
64 " add %%rcx, %[len] \n"
66 : "a" (16), "d" (16), [strptr] "r" (strptr), [matchptr] "r" (matchptr)
67 : "rcx", "cc", "xmm0", "memory"
71 return min(len, max_len);
75 #endif /* _WIMLIB_LZ_EXTEND_H */