#include "wimlib/bitops.h"
#include "wimlib/unaligned.h"
+#include <smmintrin.h>
+
/* Return the number of bytes at @matchptr that match the bytes at @strptr, up
* to a maximum of @max_len. Initially, @start_len bytes are matched. */
static inline u32
lz_extend(const u8 * const strptr, const u8 * const matchptr,
const u32 start_len, const u32 max_len)
{
+#if 0
u32 len = start_len;
machine_word_t v_word;
- if (UNALIGNED_ACCESS_IS_FAST) {
+ for (;;) {
+ v_word = load_word_unaligned(&matchptr[len]) ^
+ load_word_unaligned(&strptr[len]);
+ if (v_word != 0 || len >= max_len)
+ break;
+ len += WORDSIZE;
+ }
+
+ return min(max_len, len + (ffsw(v_word) >> 3));
+#else
- if (likely(max_len - len >= 4 * WORDSIZE)) {
+ const u8 *p1 = strptr + start_len;
+ const u8 *p2 = matchptr + start_len;
- #define COMPARE_WORD_STEP \
- v_word = load_word_unaligned(&matchptr[len]) ^ \
- load_word_unaligned(&strptr[len]); \
- if (v_word != 0) \
- goto word_differs; \
- len += WORDSIZE; \
+ u8 saved = strptr[max_len];
+ ((u8 *)strptr)[max_len] = matchptr[max_len] + 1;
- COMPARE_WORD_STEP
- COMPARE_WORD_STEP
- COMPARE_WORD_STEP
- COMPARE_WORD_STEP
- #undef COMPARE_WORD_STEP
- }
+ __asm__(
+ " movdqu (%[p1]), %%xmm0 \n"
+ " pcmpestri $0x18, (%[p2]), %%xmm0 \n"
+ " jc 2f \n"
+ "1: \n"
+ " add $0x10, %[p1] \n"
+ " add $0x10, %[p2] \n"
+ " movdqu (%[p1]), %%xmm0 \n"
+ " pcmpestri $0x18, (%[p2]), %%xmm0 \n"
+ " jnc 1b \n"
+ "2: \n"
+ " add %%rcx, %[p1] \n"
+ " add %%rcx, %[p2] \n"
+ : [p1] "+r" (p1), [p2] "+r" (p2)
+ : "a" (16), "d" (16)
+ : "rcx", "cc", "xmm0", "memory"
+ );
- while (len + WORDSIZE <= max_len) {
- v_word = load_word_unaligned(&matchptr[len]) ^
- load_word_unaligned(&strptr[len]);
- if (v_word != 0)
- goto word_differs;
- len += WORDSIZE;
- }
- }
- while (len < max_len && matchptr[len] == strptr[len])
- len++;
- return len;
+ ((u8 *)strptr)[max_len] = saved;
-word_differs:
- if (CPU_IS_LITTLE_ENDIAN)
- len += (ffsw(v_word) >> 3);
- else
- len += (8 * WORDSIZE - 1 - flsw(v_word)) >> 3;
- return len;
+ return p1 - strptr;
+#endif
}
#endif /* _WIMLIB_LZ_EXTEND_H */
{
u64 size = 0;
+ destructive = false;
+
if (max_bufsize > LZX_MAX_WINDOW_SIZE)
return 0;
size += lzx_get_compressor_size(max_bufsize, compression_level);
if (!destructive)
- size += max_bufsize; /* in_buffer */
+ size += max_bufsize + LZX_MAX_MATCH_LEN; /* in_buffer */
return size;
}
unsigned window_order;
struct lzx_compressor *c;
+ destructive = false;
+
window_order = lzx_get_window_order(max_bufsize);
if (window_order == 0)
return WIMLIB_ERR_INVALID_PARAM;
c->window_order = window_order;
if (!c->destructive) {
- c->in_buffer = MALLOC(max_bufsize);
+ c->in_buffer = MALLOC(max_bufsize + LZX_MAX_MATCH_LEN);
if (!c->in_buffer)
goto oom1;
+ randomize_byte_array(&c->in_buffer[max_bufsize], LZX_MAX_MATCH_LEN);
}
if (compression_level <= LZX_MAX_FAST_LEVEL) {