From: Eric Biggers Date: Wed, 22 Jun 2016 01:01:57 +0000 (-0500) Subject: lz_extend: simplify lz_extend() slightly X-Git-Tag: v1.10.0~40 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=0ec3ead8ebd29703e12342c56b282ae37b188e6d;hp=76689b1cac26c545260568997ae7fb949846f302 lz_extend: simplify lz_extend() slightly Unrolling the first four word copies does not seem give noticably better performance anymore, and on a recent Intel processor actually appears to decrease the performance slightly. --- diff --git a/include/wimlib/lz_extend.h b/include/wimlib/lz_extend.h index 858cb9a3..2fb76bc9 100644 --- a/include/wimlib/lz_extend.h +++ b/include/wimlib/lz_extend.h @@ -3,7 +3,7 @@ * * The following copying information applies to this specific source code file: * - * Written in 2014-2015 by Eric Biggers + * Written in 2014-2016 by Eric Biggers * * To the extent possible under law, the author(s) have dedicated all copyright * and related and neighboring rights to this software to the public domain @@ -24,52 +24,30 @@ #include "wimlib/bitops.h" #include "wimlib/unaligned.h" -/* Return the number of bytes at @matchptr that match the bytes at @strptr, up - * to a maximum of @max_len. Initially, @start_len bytes are matched. */ +/* + * Return the number of bytes at @matchptr that match the bytes at @strptr, up + * to a maximum of @max_len. Initially, @len bytes are matched. + */ static inline u32 lz_extend(const u8 * const strptr, const u8 * const matchptr, - const u32 start_len, const u32 max_len) + u32 len, const u32 max_len) { - u32 len = start_len; - machine_word_t v_word; - - if (UNALIGNED_ACCESS_IS_FAST) { - - if (likely(max_len - len >= 4 * WORDSIZE)) { - - #define COMPARE_WORD_STEP \ - v_word = load_word_unaligned(&matchptr[len]) ^ \ - load_word_unaligned(&strptr[len]); \ - if (v_word != 0) \ - goto word_differs; \ - len += WORDSIZE; \ - - COMPARE_WORD_STEP - COMPARE_WORD_STEP - COMPARE_WORD_STEP - COMPARE_WORD_STEP - #undef COMPARE_WORD_STEP - } - - while (len + WORDSIZE <= max_len) { - v_word = load_word_unaligned(&matchptr[len]) ^ - load_word_unaligned(&strptr[len]); - if (v_word != 0) - goto word_differs; - len += WORDSIZE; + while (UNALIGNED_ACCESS_IS_FAST && len + WORDSIZE <= max_len) { + machine_word_t v = load_word_unaligned(matchptr + len) ^ + load_word_unaligned(strptr + len); + if (v != 0) { + if (CPU_IS_LITTLE_ENDIAN) + len += ffsw(v) >> 3; + else + len += (8 * WORDSIZE - 1 - flsw(v)) >> 3; + return len; } + len += WORDSIZE; } while (len < max_len && matchptr[len] == strptr[len]) len++; return len; - -word_differs: - if (CPU_IS_LITTLE_ENDIAN) - len += (ffsw(v_word) >> 3); - else - len += (8 * WORDSIZE - 1 - flsw(v_word)) >> 3; - return len; } #endif /* _WIMLIB_LZ_EXTEND_H */