From: Eric Biggers Date: Sun, 5 Jun 2016 00:07:15 +0000 (-0500) Subject: word X-Git-Url: https://wimlib.net/git/?a=commitdiff_plain;h=5f039b22331a64c1b5649894a177adaa2f0040e8;hp=9de12835a474880d76002a3107e4e170b1bff819;p=wimlib word --- diff --git a/include/wimlib/bt_matchfinder.h b/include/wimlib/bt_matchfinder.h index 6a2e7edf..e10281f2 100644 --- a/include/wimlib/bt_matchfinder.h +++ b/include/wimlib/bt_matchfinder.h @@ -145,35 +145,35 @@ static inline struct lz_match * TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * const restrict mf, const u8 * const restrict in_begin, const ptrdiff_t cur_pos, - const u32 max_len, - const u32 nice_len, - const u32 max_search_depth, + const machine_word_t max_len, + const machine_word_t nice_len, + const machine_word_t max_search_depth, u32 next_hashes[const restrict static 2], u32 * const restrict best_len_ret, struct lz_match * restrict lz_matchptr, const bool record_matches) { const u8 *in_next = in_begin + cur_pos; - u32 depth_remaining = max_search_depth; - u32 next_seq4; - u32 next_seq3; - u32 hash3; - u32 hash4; + machine_word_t depth_remaining = max_search_depth; + machine_word_t next_seq4; + machine_word_t next_seq3; + machine_word_t hash3; + machine_word_t hash4; #ifdef BT_MATCHFINDER_HASH2_ORDER - u16 seq2; - u32 hash2; + machine_word_t seq2; + machine_word_t hash2; #endif STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 && BT_MATCHFINDER_HASH3_WAYS <= 2); - u32 cur_node; + machine_word_t cur_node; #if BT_MATCHFINDER_HASH3_WAYS >= 2 - u32 cur_node_2; + machine_word_t cur_node_2; #endif const u8 *matchptr; mf_pos_t *pending_lt_ptr, *pending_gt_ptr; - u32 best_lt_len, best_gt_len; - u32 len; - u32 best_len = 3; + machine_word_t best_lt_len, best_gt_len; + machine_word_t len; + machine_word_t best_len = 3; next_seq4 = load_u32_unaligned(in_next + 1); next_seq3 = loaded_u32_to_u24(next_seq4); @@ -328,9 +328,9 @@ static inline struct lz_match * TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) *mf, const u8 *in_begin, ptrdiff_t cur_pos, - u32 max_len, - u32 nice_len, - u32 max_search_depth, + machine_word_t max_len, + machine_word_t nice_len, + machine_word_t max_search_depth, u32 next_hashes[static 2], u32 *best_len_ret, struct lz_match *lz_matchptr) @@ -357,9 +357,9 @@ static inline void TEMPLATED(bt_matchfinder_skip_position)(struct TEMPLATED(bt_matchfinder) *mf, const u8 *in_begin, ptrdiff_t cur_pos, - u32 max_len, - u32 nice_len, - u32 max_search_depth, + machine_word_t max_len, + machine_word_t nice_len, + machine_word_t max_search_depth, u32 next_hashes[static 2]) { u32 best_len; diff --git a/include/wimlib/lz_extend.h b/include/wimlib/lz_extend.h index 3047ca21..d2f696c0 100644 --- a/include/wimlib/lz_extend.h +++ b/include/wimlib/lz_extend.h @@ -21,6 +21,7 @@ #ifndef _WIMLIB_LZ_EXTEND_H #define _WIMLIB_LZ_EXTEND_H +#include "wimlib/assert.h" #include "wimlib/bitops.h" #include "wimlib/unaligned.h" @@ -28,9 +29,9 @@ /* Return the number of bytes at @matchptr that match the bytes at @strptr, up * to a maximum of @max_len. Initially, @start_len bytes are matched. */ -static inline u32 +static inline machine_word_t lz_extend(const u8 * const strptr, const u8 * const matchptr, - const u32 start_len, const u32 max_len) + const machine_word_t start_len, const machine_word_t max_len) { #if 0 u32 len = start_len; @@ -48,22 +49,27 @@ lz_extend(const u8 * const strptr, const u8 * const matchptr, #else u64 len = start_len; - u8 saved = strptr[max_len]; - ((u8 *)strptr)[max_len] = matchptr[max_len] + 1; __asm__( + "1: \n" " movdqu 0x0(%[strptr],%[len],1), %%xmm0 \n" " pcmpestri $0x18, 0x0(%[matchptr],%[len],1), %%xmm0 \n" " jc 2f \n" - "1: \n" " add $0x10, %[len] \n" + " movdqu 0x0(%[strptr],%[len],1), %%xmm0 \n" " pcmpestri $0x18, 0x0(%[matchptr],%[len],1), %%xmm0 \n" " jc 2f \n" " add $0x10, %[len] \n" + " movdqu 0x0(%[strptr],%[len],1), %%xmm0 \n" " pcmpestri $0x18, 0x0(%[matchptr],%[len],1), %%xmm0 \n" - " jnc 1b \n" + " jc 2f \n" + " add $0x10, %[len] \n" + + " cmp $257, %[len] \n" + " jb 1b \n" + " xor %%rcx, %%rcx \n" "2: \n" " add %%rcx, %[len] \n" : [len] "+r" (len) @@ -72,9 +78,7 @@ lz_extend(const u8 * const strptr, const u8 * const matchptr, ); - ((u8 *)strptr)[max_len] = saved; - - return len; + return min(len, max_len); #endif } diff --git a/src/lzx_compress.c b/src/lzx_compress.c index e4668042..9c444917 100644 --- a/src/lzx_compress.c +++ b/src/lzx_compress.c @@ -2506,10 +2506,10 @@ lzx_create_compressor(size_t max_bufsize, unsigned compression_level, c->window_order = window_order; if (!c->destructive) { - c->in_buffer = MALLOC(max_bufsize + LZX_MAX_MATCH_LEN); + c->in_buffer = MALLOC(max_bufsize + LZX_MAX_MATCH_LEN*2); if (!c->in_buffer) goto oom1; - randomize_byte_array(&c->in_buffer[max_bufsize], LZX_MAX_MATCH_LEN); + randomize_byte_array(&c->in_buffer[max_bufsize], LZX_MAX_MATCH_LEN*2); } if (compression_level <= LZX_MAX_FAST_LEVEL) {