X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=include%2Fwimlib%2Flz_extend.h;h=a3780e1d54a819245b980dc36439d0ddc7d2a3ab;hp=7d7ed8b8e88b02ceb6ebdd684ed1dddd33f92a08;hb=226a6dfe2909e054568298196785c944a1b5c4fa;hpb=7e3f3761e0a9fc93341ed0b9c69f6056d9a97af9 diff --git a/include/wimlib/lz_extend.h b/include/wimlib/lz_extend.h index 7d7ed8b8..a3780e1d 100644 --- a/include/wimlib/lz_extend.h +++ b/include/wimlib/lz_extend.h @@ -10,13 +10,8 @@ #ifndef _WIMLIB_LZ_EXTEND_H #define _WIMLIB_LZ_EXTEND_H -#include "wimlib/types.h" - -#if (defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) -# define HAVE_FAST_LZ_EXTEND 1 -#else -# define HAVE_FAST_LZ_EXTEND 0 -#endif +#include "wimlib/bitops.h" +#include "wimlib/unaligned.h" /* Return the number of bytes at @matchptr that match the bytes at @strptr, up * to a maximum of @max_len. Initially, @start_len bytes are matched. */ @@ -25,49 +20,44 @@ lz_extend(const u8 * const strptr, const u8 * const matchptr, const u32 start_len, const u32 max_len) { u32 len = start_len; + machine_word_t v_word; -#if HAVE_FAST_LZ_EXTEND - - while (len + sizeof(unsigned long) <= max_len) { - unsigned long x; + if (UNALIGNED_ACCESS_IS_FAST) { - x = *(const unsigned long *)&matchptr[len] ^ - *(const unsigned long *)&strptr[len]; - if (x != 0) - return len + (__builtin_ctzl(x) >> 3); - len += sizeof(unsigned long); - } + if (likely(max_len - len >= 4 * WORDSIZE)) { - if (sizeof(unsigned int) < sizeof(unsigned long) && - len + sizeof(unsigned int) <= max_len) - { - unsigned int x; + #define COMPARE_WORD_STEP \ + v_word = load_word_unaligned(&matchptr[len]) ^ \ + load_word_unaligned(&strptr[len]); \ + if (v_word != 0) \ + goto word_differs; \ + len += WORDSIZE; \ - x = *(const unsigned int *)&matchptr[len] ^ - *(const unsigned int *)&strptr[len]; - if (x != 0) - return len + (__builtin_ctz(x) >> 3); - len += sizeof(unsigned int); - } + COMPARE_WORD_STEP + COMPARE_WORD_STEP + COMPARE_WORD_STEP + COMPARE_WORD_STEP + #undef COMPARE_WORD_STEP + } - if (sizeof(unsigned int) == 4) { - if (len < max_len && matchptr[len] == strptr[len]) { - len++; - if (len < max_len && matchptr[len] == strptr[len]) { - len++; - if (len < max_len && matchptr[len] == strptr[len]) { - len++; - } - } + while (len + WORDSIZE <= max_len) { + v_word = load_word_unaligned(&matchptr[len]) ^ + load_word_unaligned(&strptr[len]); + if (v_word != 0) + goto word_differs; + len += WORDSIZE; } - return len; } -#endif /* HAVE_FAST_LZ_EXTEND */ - while (len < max_len && matchptr[len] == strptr[len]) len++; + return len; +word_differs: + if (CPU_IS_LITTLE_ENDIAN) + len += (ffsw(v_word) >> 3); + else + len += (flsw(v_word) >> 3); return len; }