*
* Fast match extension for Lempel-Ziv matchfinding.
*
+ * Author: Eric Biggers
+ * Year: 2014, 2015
+ *
* The author dedicates this file to the public domain.
* You can do whatever you want with this file.
*/
#ifndef _WIMLIB_LZ_EXTEND_H
#define _WIMLIB_LZ_EXTEND_H
-#include "wimlib/types.h"
-
-#if (defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__)
-# define HAVE_FAST_LZ_EXTEND 1
-#else
-# define HAVE_FAST_LZ_EXTEND 0
-#endif
+#include "wimlib/bitops.h"
+#include "wimlib/unaligned.h"
/* Return the number of bytes at @matchptr that match the bytes at @strptr, up
* to a maximum of @max_len. Initially, @start_len bytes are matched. */
const u32 start_len, const u32 max_len)
{
u32 len = start_len;
+ machine_word_t v_word;
-#if HAVE_FAST_LZ_EXTEND
+ if (UNALIGNED_ACCESS_IS_FAST) {
- while (len + sizeof(unsigned long) <= max_len) {
- unsigned long x;
-
- x = *(const unsigned long *)&matchptr[len] ^
- *(const unsigned long *)&strptr[len];
- if (x != 0)
- return len + (__builtin_ctzl(x) >> 3);
- len += sizeof(unsigned long);
- }
+ if (likely(max_len - len >= 4 * WORDSIZE)) {
- if (sizeof(unsigned int) < sizeof(unsigned long) &&
- len + sizeof(unsigned int) <= max_len)
- {
- unsigned int x;
+ #define COMPARE_WORD_STEP \
+ v_word = load_word_unaligned(&matchptr[len]) ^ \
+ load_word_unaligned(&strptr[len]); \
+ if (v_word != 0) \
+ goto word_differs; \
+ len += WORDSIZE; \
- x = *(const unsigned int *)&matchptr[len] ^
- *(const unsigned int *)&strptr[len];
- if (x != 0)
- return len + (__builtin_ctz(x) >> 3);
- len += sizeof(unsigned int);
- }
+ COMPARE_WORD_STEP
+ COMPARE_WORD_STEP
+ COMPARE_WORD_STEP
+ COMPARE_WORD_STEP
+ #undef COMPARE_WORD_STEP
+ }
- if (sizeof(unsigned int) == 4) {
- if (len < max_len && matchptr[len] == strptr[len]) {
- len++;
- if (len < max_len && matchptr[len] == strptr[len]) {
- len++;
- if (len < max_len && matchptr[len] == strptr[len]) {
- len++;
- }
- }
+ while (len + WORDSIZE <= max_len) {
+ v_word = load_word_unaligned(&matchptr[len]) ^
+ load_word_unaligned(&strptr[len]);
+ if (v_word != 0)
+ goto word_differs;
+ len += WORDSIZE;
}
- return len;
}
-#endif /* HAVE_FAST_LZ_EXTEND */
-
while (len < max_len && matchptr[len] == strptr[len])
len++;
+ return len;
+word_differs:
+ if (CPU_IS_LITTLE_ENDIAN)
+ len += (ffsw(v_word) >> 3);
+ else
+ len += (flsw(v_word) >> 3);
return len;
}