/*
- * lz_extend.h
+ * lz_extend.h - fast match extension for Lempel-Ziv matchfinding
*
- * Fast match extension for Lempel-Ziv matchfinding.
+ * The following copying information applies to this specific source code file:
*
- * The author dedicates this file to the public domain.
- * You can do whatever you want with this file.
+ * Written in 2014-2016 by Eric Biggers <ebiggers3@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
+ * Dedication (the "CC0").
+ *
+ * This software is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
+ *
+ * You should have received a copy of the CC0 along with this software; if not
+ * see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef _WIMLIB_LZ_EXTEND_H
#define _WIMLIB_LZ_EXTEND_H
-#include "wimlib/types.h"
-
-#if (defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__)
-# define HAVE_FAST_LZ_EXTEND 1
-#else
-# define HAVE_FAST_LZ_EXTEND 0
-#endif
+#include "wimlib/bitops.h"
+#include "wimlib/unaligned.h"
-/* Return the number of bytes at @matchptr that match the bytes at @strptr, up
- * to a maximum of @max_len. Initially, @start_len bytes are matched. */
+/*
+ * Return the number of bytes at @matchptr that match the bytes at @strptr, up
+ * to a maximum of @max_len. Initially, @len bytes are matched.
+ */
static inline u32
lz_extend(const u8 * const strptr, const u8 * const matchptr,
- const u32 start_len, const u32 max_len)
+ u32 len, const u32 max_len)
{
- u32 len = start_len;
-
-#if HAVE_FAST_LZ_EXTEND
-
- while (len + sizeof(unsigned long) <= max_len) {
- unsigned long x;
-
- x = *(const unsigned long *)&matchptr[len] ^
- *(const unsigned long *)&strptr[len];
- if (x != 0)
- return len + (__builtin_ctzl(x) >> 3);
- len += sizeof(unsigned long);
- }
-
- if (sizeof(unsigned int) < sizeof(unsigned long) &&
- len + sizeof(unsigned int) <= max_len)
- {
- unsigned int x;
-
- x = *(const unsigned int *)&matchptr[len] ^
- *(const unsigned int *)&strptr[len];
- if (x != 0)
- return len + (__builtin_ctz(x) >> 3);
- len += sizeof(unsigned int);
- }
-
- if (sizeof(unsigned int) == 4) {
- if (len < max_len && matchptr[len] == strptr[len]) {
- len++;
- if (len < max_len && matchptr[len] == strptr[len]) {
- len++;
- if (len < max_len && matchptr[len] == strptr[len]) {
- len++;
- }
- }
+ while (UNALIGNED_ACCESS_IS_FAST && len + WORDSIZE <= max_len) {
+ machine_word_t v = load_word_unaligned(matchptr + len) ^
+ load_word_unaligned(strptr + len);
+ if (v != 0) {
+ if (CPU_IS_LITTLE_ENDIAN)
+ len += ffsw(v) >> 3;
+ else
+ len += (8 * WORDSIZE - 1 - flsw(v)) >> 3;
+ return len;
}
- return len;
+ len += WORDSIZE;
}
-#endif /* HAVE_FAST_LZ_EXTEND */
-
while (len < max_len && matchptr[len] == strptr[len])
len++;
-
return len;
}