X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=include%2Fwimlib%2Flz_extend.h;h=2fb76bc921d47c5b1986c087b1e12e24cd7b62ac;hp=7d7ed8b8e88b02ceb6ebdd684ed1dddd33f92a08;hb=0ec3ead8ebd29703e12342c56b282ae37b188e6d;hpb=7e3f3761e0a9fc93341ed0b9c69f6056d9a97af9 diff --git a/include/wimlib/lz_extend.h b/include/wimlib/lz_extend.h index 7d7ed8b8..2fb76bc9 100644 --- a/include/wimlib/lz_extend.h +++ b/include/wimlib/lz_extend.h @@ -1,73 +1,52 @@ /* - * lz_extend.h + * lz_extend.h - fast match extension for Lempel-Ziv matchfinding * - * Fast match extension for Lempel-Ziv matchfinding. + * The following copying information applies to this specific source code file: * - * The author dedicates this file to the public domain. - * You can do whatever you want with this file. + * Written in 2014-2016 by Eric Biggers + * + * To the extent possible under law, the author(s) have dedicated all copyright + * and related and neighboring rights to this software to the public domain + * worldwide via the Creative Commons Zero 1.0 Universal Public Domain + * Dedication (the "CC0"). + * + * This software is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the CC0 for more details. + * + * You should have received a copy of the CC0 along with this software; if not + * see . */ #ifndef _WIMLIB_LZ_EXTEND_H #define _WIMLIB_LZ_EXTEND_H -#include "wimlib/types.h" - -#if (defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) -# define HAVE_FAST_LZ_EXTEND 1 -#else -# define HAVE_FAST_LZ_EXTEND 0 -#endif +#include "wimlib/bitops.h" +#include "wimlib/unaligned.h" -/* Return the number of bytes at @matchptr that match the bytes at @strptr, up - * to a maximum of @max_len. Initially, @start_len bytes are matched. */ +/* + * Return the number of bytes at @matchptr that match the bytes at @strptr, up + * to a maximum of @max_len. Initially, @len bytes are matched. + */ static inline u32 lz_extend(const u8 * const strptr, const u8 * const matchptr, - const u32 start_len, const u32 max_len) + u32 len, const u32 max_len) { - u32 len = start_len; - -#if HAVE_FAST_LZ_EXTEND - - while (len + sizeof(unsigned long) <= max_len) { - unsigned long x; - - x = *(const unsigned long *)&matchptr[len] ^ - *(const unsigned long *)&strptr[len]; - if (x != 0) - return len + (__builtin_ctzl(x) >> 3); - len += sizeof(unsigned long); - } - - if (sizeof(unsigned int) < sizeof(unsigned long) && - len + sizeof(unsigned int) <= max_len) - { - unsigned int x; - - x = *(const unsigned int *)&matchptr[len] ^ - *(const unsigned int *)&strptr[len]; - if (x != 0) - return len + (__builtin_ctz(x) >> 3); - len += sizeof(unsigned int); - } - - if (sizeof(unsigned int) == 4) { - if (len < max_len && matchptr[len] == strptr[len]) { - len++; - if (len < max_len && matchptr[len] == strptr[len]) { - len++; - if (len < max_len && matchptr[len] == strptr[len]) { - len++; - } - } + while (UNALIGNED_ACCESS_IS_FAST && len + WORDSIZE <= max_len) { + machine_word_t v = load_word_unaligned(matchptr + len) ^ + load_word_unaligned(strptr + len); + if (v != 0) { + if (CPU_IS_LITTLE_ENDIAN) + len += ffsw(v) >> 3; + else + len += (8 * WORDSIZE - 1 - flsw(v)) >> 3; + return len; } - return len; + len += WORDSIZE; } -#endif /* HAVE_FAST_LZ_EXTEND */ - while (len < max_len && matchptr[len] == strptr[len]) len++; - return len; }