TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * const restrict mf,
const u8 * const restrict in_begin,
const ptrdiff_t cur_pos,
- const u32 max_len,
- const u32 nice_len,
- const u32 max_search_depth,
+ const machine_word_t max_len,
+ const machine_word_t nice_len,
+ const machine_word_t max_search_depth,
u32 next_hashes[const restrict static 2],
u32 * const restrict best_len_ret,
struct lz_match * restrict lz_matchptr,
const bool record_matches)
{
const u8 *in_next = in_begin + cur_pos;
- u32 depth_remaining = max_search_depth;
- u32 next_seq4;
- u32 next_seq3;
- u32 hash3;
- u32 hash4;
+ machine_word_t depth_remaining = max_search_depth;
+ machine_word_t next_seq4;
+ machine_word_t next_seq3;
+ machine_word_t hash3;
+ machine_word_t hash4;
#ifdef BT_MATCHFINDER_HASH2_ORDER
- u16 seq2;
- u32 hash2;
+ machine_word_t seq2;
+ machine_word_t hash2;
#endif
STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
BT_MATCHFINDER_HASH3_WAYS <= 2);
- u32 cur_node;
+ machine_word_t cur_node;
#if BT_MATCHFINDER_HASH3_WAYS >= 2
- u32 cur_node_2;
+ machine_word_t cur_node_2;
#endif
const u8 *matchptr;
mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
- u32 best_lt_len, best_gt_len;
- u32 len;
- u32 best_len = 3;
+ machine_word_t best_lt_len, best_gt_len;
+ machine_word_t len;
+ machine_word_t best_len = 3;
next_seq4 = load_u32_unaligned(in_next + 1);
next_seq3 = loaded_u32_to_u24(next_seq4);
TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) *mf,
const u8 *in_begin,
ptrdiff_t cur_pos,
- u32 max_len,
- u32 nice_len,
- u32 max_search_depth,
+ machine_word_t max_len,
+ machine_word_t nice_len,
+ machine_word_t max_search_depth,
u32 next_hashes[static 2],
u32 *best_len_ret,
struct lz_match *lz_matchptr)
TEMPLATED(bt_matchfinder_skip_position)(struct TEMPLATED(bt_matchfinder) *mf,
const u8 *in_begin,
ptrdiff_t cur_pos,
- u32 max_len,
- u32 nice_len,
- u32 max_search_depth,
+ machine_word_t max_len,
+ machine_word_t nice_len,
+ machine_word_t max_search_depth,
u32 next_hashes[static 2])
{
u32 best_len;
#ifndef _WIMLIB_LZ_EXTEND_H
#define _WIMLIB_LZ_EXTEND_H
+#include "wimlib/assert.h"
#include "wimlib/bitops.h"
#include "wimlib/unaligned.h"
/* Return the number of bytes at @matchptr that match the bytes at @strptr, up
* to a maximum of @max_len. Initially, @start_len bytes are matched. */
-static inline u32
+static inline machine_word_t
lz_extend(const u8 * const strptr, const u8 * const matchptr,
- const u32 start_len, const u32 max_len)
+ const machine_word_t start_len, const machine_word_t max_len)
{
#if 0
u32 len = start_len;
#else
u64 len = start_len;
- u8 saved = strptr[max_len];
- ((u8 *)strptr)[max_len] = matchptr[max_len] + 1;
__asm__(
+ "1: \n"
" movdqu 0x0(%[strptr],%[len],1), %%xmm0 \n"
" pcmpestri $0x18, 0x0(%[matchptr],%[len],1), %%xmm0 \n"
" jc 2f \n"
- "1: \n"
" add $0x10, %[len] \n"
+
" movdqu 0x0(%[strptr],%[len],1), %%xmm0 \n"
" pcmpestri $0x18, 0x0(%[matchptr],%[len],1), %%xmm0 \n"
" jc 2f \n"
" add $0x10, %[len] \n"
+
" movdqu 0x0(%[strptr],%[len],1), %%xmm0 \n"
" pcmpestri $0x18, 0x0(%[matchptr],%[len],1), %%xmm0 \n"
- " jnc 1b \n"
+ " jc 2f \n"
+ " add $0x10, %[len] \n"
+
+ " cmp $257, %[len] \n"
+ " jb 1b \n"
+ " xor %%rcx, %%rcx \n"
"2: \n"
" add %%rcx, %[len] \n"
: [len] "+r" (len)
);
- ((u8 *)strptr)[max_len] = saved;
-
- return len;
+ return min(len, max_len);
#endif
}