]> wimlib.net Git - wimlib/commitdiff
word
authorEric Biggers <ebiggers3@gmail.com>
Sun, 5 Jun 2016 00:07:15 +0000 (19:07 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Sun, 5 Jun 2016 00:07:15 +0000 (19:07 -0500)
include/wimlib/bt_matchfinder.h
include/wimlib/lz_extend.h
src/lzx_compress.c

index 6a2e7edf14ab589568c88269c2f9516821d4bea7..e10281f2fc893682b03574a6779b437ca56cc3c1 100644 (file)
@@ -145,35 +145,35 @@ static inline struct lz_match *
 TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * const restrict mf,
                                           const u8 * const restrict in_begin,
                                           const ptrdiff_t cur_pos,
-                                          const u32 max_len,
-                                          const u32 nice_len,
-                                          const u32 max_search_depth,
+                                          const machine_word_t max_len,
+                                          const machine_word_t nice_len,
+                                          const machine_word_t max_search_depth,
                                           u32 next_hashes[const restrict static 2],
                                           u32 * const restrict best_len_ret,
                                           struct lz_match * restrict lz_matchptr,
                                           const bool record_matches)
 {
        const u8 *in_next = in_begin + cur_pos;
-       u32 depth_remaining = max_search_depth;
-       u32 next_seq4;
-       u32 next_seq3;
-       u32 hash3;
-       u32 hash4;
+       machine_word_t depth_remaining = max_search_depth;
+       machine_word_t next_seq4;
+       machine_word_t next_seq3;
+       machine_word_t hash3;
+       machine_word_t hash4;
 #ifdef BT_MATCHFINDER_HASH2_ORDER
-       u16 seq2;
-       u32 hash2;
+       machine_word_t seq2;
+       machine_word_t hash2;
 #endif
        STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
                      BT_MATCHFINDER_HASH3_WAYS <= 2);
-       u32 cur_node;
+       machine_word_t cur_node;
 #if BT_MATCHFINDER_HASH3_WAYS >= 2
-       u32 cur_node_2;
+       machine_word_t cur_node_2;
 #endif
        const u8 *matchptr;
        mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
-       u32 best_lt_len, best_gt_len;
-       u32 len;
-       u32 best_len = 3;
+       machine_word_t best_lt_len, best_gt_len;
+       machine_word_t len;
+       machine_word_t best_len = 3;
 
        next_seq4 = load_u32_unaligned(in_next + 1);
        next_seq3 = loaded_u32_to_u24(next_seq4);
@@ -328,9 +328,9 @@ static inline struct lz_match *
 TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) *mf,
                                      const u8 *in_begin,
                                      ptrdiff_t cur_pos,
-                                     u32 max_len,
-                                     u32 nice_len,
-                                     u32 max_search_depth,
+                                     machine_word_t max_len,
+                                     machine_word_t nice_len,
+                                     machine_word_t max_search_depth,
                                      u32 next_hashes[static 2],
                                      u32 *best_len_ret,
                                      struct lz_match *lz_matchptr)
@@ -357,9 +357,9 @@ static inline void
 TEMPLATED(bt_matchfinder_skip_position)(struct TEMPLATED(bt_matchfinder) *mf,
                                        const u8 *in_begin,
                                        ptrdiff_t cur_pos,
-                                       u32 max_len,
-                                       u32 nice_len,
-                                       u32 max_search_depth,
+                                       machine_word_t max_len,
+                                       machine_word_t nice_len,
+                                       machine_word_t max_search_depth,
                                        u32 next_hashes[static 2])
 {
        u32 best_len;
index 3047ca21da31c710037a3cc2f6ae695cd6550a49..d2f696c06534bdeb8ecc6139ea0a6220e9540fd3 100644 (file)
@@ -21,6 +21,7 @@
 #ifndef _WIMLIB_LZ_EXTEND_H
 #define _WIMLIB_LZ_EXTEND_H
 
+#include "wimlib/assert.h"
 #include "wimlib/bitops.h"
 #include "wimlib/unaligned.h"
 
@@ -28,9 +29,9 @@
 
 /* Return the number of bytes at @matchptr that match the bytes at @strptr, up
  * to a maximum of @max_len.  Initially, @start_len bytes are matched.  */
-static inline u32
+static inline machine_word_t
 lz_extend(const u8 * const strptr, const u8 * const matchptr,
-         const u32 start_len, const u32 max_len)
+         const machine_word_t start_len, const machine_word_t max_len)
 {
 #if 0
        u32 len = start_len;
@@ -48,22 +49,27 @@ lz_extend(const u8 * const strptr, const u8 * const matchptr,
 #else
 
        u64 len = start_len;
-       u8 saved = strptr[max_len];
-       ((u8 *)strptr)[max_len] = matchptr[max_len] + 1;
 
        __asm__(
+               "1:                                          \n"
                "  movdqu 0x0(%[strptr],%[len],1), %%xmm0    \n"
                "  pcmpestri $0x18, 0x0(%[matchptr],%[len],1), %%xmm0    \n"
                "  jc 2f                                     \n"
-               "1:                                          \n"
                "  add $0x10, %[len]                         \n"
+
                "  movdqu 0x0(%[strptr],%[len],1), %%xmm0    \n"
                "  pcmpestri $0x18, 0x0(%[matchptr],%[len],1), %%xmm0    \n"
                "  jc 2f                                     \n"
                "  add $0x10, %[len]                         \n"
+
                "  movdqu 0x0(%[strptr],%[len],1), %%xmm0    \n"
                "  pcmpestri $0x18, 0x0(%[matchptr],%[len],1), %%xmm0    \n"
-               "  jnc 1b                                    \n"
+               "  jc 2f                                     \n"
+               "  add $0x10, %[len]                         \n"
+
+               "  cmp $257, %[len]                          \n"
+               "  jb 1b                                     \n"
+               "  xor %%rcx, %%rcx                          \n"
                "2:                                          \n"
                "  add %%rcx, %[len]                         \n"
                : [len] "+r" (len)
@@ -72,9 +78,7 @@ lz_extend(const u8 * const strptr, const u8 * const matchptr,
               );
 
 
-       ((u8 *)strptr)[max_len] = saved;
-
-       return len;
+       return min(len, max_len);
 #endif
 }
 
index e46680420b7c894861fcf8fd1f7f1f92cc547718..9c4449179a78c5dec825926dbff20ede89d8aa0f 100644 (file)
@@ -2506,10 +2506,10 @@ lzx_create_compressor(size_t max_bufsize, unsigned compression_level,
        c->window_order = window_order;
 
        if (!c->destructive) {
-               c->in_buffer = MALLOC(max_bufsize + LZX_MAX_MATCH_LEN);
+               c->in_buffer = MALLOC(max_bufsize + LZX_MAX_MATCH_LEN*2);
                if (!c->in_buffer)
                        goto oom1;
-               randomize_byte_array(&c->in_buffer[max_bufsize], LZX_MAX_MATCH_LEN);
+               randomize_byte_array(&c->in_buffer[max_bufsize], LZX_MAX_MATCH_LEN*2);
        }
 
        if (compression_level <= LZX_MAX_FAST_LEVEL) {