X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=include%2Fwimlib%2Flz_extend.h;fp=include%2Fwimlib%2Flz_extend.h;h=ad16847d9a769ab17175a45dcc4f352366f3e1c4;hb=e9ab3bf39bf6689a62a94fc45ccc2b3f78e183c8;hp=858cb9a3b1f791edccbb6f2663f71fc8a3eb3983;hpb=a582ba68e1919a72066ae7e4d6c0c36d6db66675;p=wimlib diff --git a/include/wimlib/lz_extend.h b/include/wimlib/lz_extend.h index 858cb9a3..ad16847d 100644 --- a/include/wimlib/lz_extend.h +++ b/include/wimlib/lz_extend.h @@ -24,52 +24,58 @@ #include "wimlib/bitops.h" #include "wimlib/unaligned.h" +#include + /* Return the number of bytes at @matchptr that match the bytes at @strptr, up * to a maximum of @max_len. Initially, @start_len bytes are matched. */ static inline u32 lz_extend(const u8 * const strptr, const u8 * const matchptr, const u32 start_len, const u32 max_len) { +#if 0 u32 len = start_len; machine_word_t v_word; - if (UNALIGNED_ACCESS_IS_FAST) { + for (;;) { + v_word = load_word_unaligned(&matchptr[len]) ^ + load_word_unaligned(&strptr[len]); + if (v_word != 0 || len >= max_len) + break; + len += WORDSIZE; + } + + return min(max_len, len + (ffsw(v_word) >> 3)); +#else - if (likely(max_len - len >= 4 * WORDSIZE)) { + const u8 *p1 = strptr + start_len; + const u8 *p2 = matchptr + start_len; - #define COMPARE_WORD_STEP \ - v_word = load_word_unaligned(&matchptr[len]) ^ \ - load_word_unaligned(&strptr[len]); \ - if (v_word != 0) \ - goto word_differs; \ - len += WORDSIZE; \ + u8 saved = strptr[max_len]; + ((u8 *)strptr)[max_len] = matchptr[max_len] + 1; - COMPARE_WORD_STEP - COMPARE_WORD_STEP - COMPARE_WORD_STEP - COMPARE_WORD_STEP - #undef COMPARE_WORD_STEP - } + __asm__( + " movdqu (%[p1]), %%xmm0 \n" + " pcmpestri $0x18, (%[p2]), %%xmm0 \n" + " jc 2f \n" + "1: \n" + " add $0x10, %[p1] \n" + " add $0x10, %[p2] \n" + " movdqu (%[p1]), %%xmm0 \n" + " pcmpestri $0x18, (%[p2]), %%xmm0 \n" + " jnc 1b \n" + "2: \n" + " add %%rcx, %[p1] \n" + " add %%rcx, %[p2] \n" + : [p1] "+r" (p1), [p2] "+r" (p2) + : "a" (16), "d" (16) + : "rcx", "cc", "xmm0", "memory" + ); - while (len + WORDSIZE <= max_len) { - v_word = load_word_unaligned(&matchptr[len]) ^ - load_word_unaligned(&strptr[len]); - if (v_word != 0) - goto word_differs; - len += WORDSIZE; - } - } - while (len < max_len && matchptr[len] == strptr[len]) - len++; - return len; + ((u8 *)strptr)[max_len] = saved; -word_differs: - if (CPU_IS_LITTLE_ENDIAN) - len += (ffsw(v_word) >> 3); - else - len += (8 * WORDSIZE - 1 - flsw(v_word)) >> 3; - return len; + return p1 - strptr; +#endif } #endif /* _WIMLIB_LZ_EXTEND_H */