]> wimlib.net Git - wimlib/blobdiff - include/wimlib/lz_extend.h
wimlib.h: document added behavior of WIMLIB_ADD_FLAG_WIMBOOT
[wimlib] / include / wimlib / lz_extend.h
index 7d7ed8b8e88b02ceb6ebdd684ed1dddd33f92a08..bd00183b973d980b5eab3f1aa2acf4b263e18569 100644 (file)
@@ -3,6 +3,9 @@
  *
  * Fast match extension for Lempel-Ziv matchfinding.
  *
+ * Author:     Eric Biggers
+ * Year:       2014, 2015
+ *
  * The author dedicates this file to the public domain.
  * You can do whatever you want with this file.
  */
 #ifndef _WIMLIB_LZ_EXTEND_H
 #define _WIMLIB_LZ_EXTEND_H
 
-#include "wimlib/types.h"
-
-#if (defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__)
-#  define HAVE_FAST_LZ_EXTEND 1
-#else
-#  define HAVE_FAST_LZ_EXTEND 0
-#endif
+#include "wimlib/bitops.h"
+#include "wimlib/unaligned.h"
 
 /* Return the number of bytes at @matchptr that match the bytes at @strptr, up
  * to a maximum of @max_len.  Initially, @start_len bytes are matched.  */
@@ -25,49 +23,44 @@ lz_extend(const u8 * const strptr, const u8 * const matchptr,
          const u32 start_len, const u32 max_len)
 {
        u32 len = start_len;
+       machine_word_t v_word;
 
-#if HAVE_FAST_LZ_EXTEND
+       if (UNALIGNED_ACCESS_IS_FAST) {
 
-       while (len + sizeof(unsigned long) <= max_len) {
-               unsigned long x;
-
-               x = *(const unsigned long *)&matchptr[len] ^
-                   *(const unsigned long *)&strptr[len];
-               if (x != 0)
-                       return len + (__builtin_ctzl(x) >> 3);
-               len += sizeof(unsigned long);
-       }
+               if (likely(max_len - len >= 4 * WORDSIZE)) {
 
-       if (sizeof(unsigned int) < sizeof(unsigned long) &&
-           len + sizeof(unsigned int) <= max_len)
-       {
-               unsigned int x;
+               #define COMPARE_WORD_STEP                                       \
+                       v_word = load_word_unaligned(&matchptr[len]) ^          \
+                                load_word_unaligned(&strptr[len]);             \
+                       if (v_word != 0)                                        \
+                               goto word_differs;                              \
+                       len += WORDSIZE;                                        \
 
-               x = *(const unsigned int *)&matchptr[len] ^
-                   *(const unsigned int *)&strptr[len];
-               if (x != 0)
-                       return len + (__builtin_ctz(x) >> 3);
-               len += sizeof(unsigned int);
-       }
+                       COMPARE_WORD_STEP
+                       COMPARE_WORD_STEP
+                       COMPARE_WORD_STEP
+                       COMPARE_WORD_STEP
+               #undef COMPARE_WORD_STEP
+               }
 
-       if (sizeof(unsigned int) == 4) {
-               if (len < max_len && matchptr[len] == strptr[len]) {
-                       len++;
-                       if (len < max_len && matchptr[len] == strptr[len]) {
-                               len++;
-                               if (len < max_len && matchptr[len] == strptr[len]) {
-                                       len++;
-                               }
-                       }
+               while (len + WORDSIZE <= max_len) {
+                       v_word = load_word_unaligned(&matchptr[len]) ^
+                                load_word_unaligned(&strptr[len]);
+                       if (v_word != 0)
+                               goto word_differs;
+                       len += WORDSIZE;
                }
-               return len;
        }
 
-#endif /* HAVE_FAST_LZ_EXTEND */
-
        while (len < max_len && matchptr[len] == strptr[len])
                len++;
+       return len;
 
+word_differs:
+       if (CPU_IS_LITTLE_ENDIAN)
+               len += (ffsw(v_word) >> 3);
+       else
+               len += (flsw(v_word) >> 3);
        return len;
 }