Get rid of matchfinder_common.h and manual memsets
authorEric Biggers <ebiggers3@gmail.com>
Sat, 19 Sep 2015 18:55:59 +0000 (13:55 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Fri, 25 Sep 2015 01:41:34 +0000 (20:41 -0500)
Makefile.am
include/wimlib/bt_matchfinder.h
include/wimlib/hc_matchfinder.h
include/wimlib/matchfinder_avx2.h [deleted file]
include/wimlib/matchfinder_common.h [deleted file]
include/wimlib/matchfinder_sse2.h [deleted file]
src/lzx_compress.c
src/xpress_compress.c

index b01b520..cb527b7 100644 (file)
@@ -129,9 +129,6 @@ libwim_la_SOURCES =         \
        include/wimlib/lzms_constants.h \
        include/wimlib/lzx_common.h     \
        include/wimlib/lzx_constants.h  \
-       include/wimlib/matchfinder_avx2.h       \
-       include/wimlib/matchfinder_common.h     \
-       include/wimlib/matchfinder_sse2.h       \
        include/wimlib/metadata.h       \
        include/wimlib/pathlist.h       \
        include/wimlib/paths.h          \
index 4fe754c..189c5a1 100644 (file)
 #ifndef _BT_MATCHFINDER_H
 #define _BT_MATCHFINDER_H
 
+#ifndef MATCHFINDER_MAX_WINDOW_ORDER
+#  error "MATCHFINDER_MAX_WINDOW_ORDER must be defined!"
+#endif
+
+#include <string.h>
+
 #include "wimlib/lz_extend.h"
 #include "wimlib/lz_hash.h"
-#include "wimlib/matchfinder_common.h"
 
 #if MATCHFINDER_MAX_WINDOW_ORDER < 13
 #  define BT_MATCHFINDER_HASH_ORDER 14
 #  define BT_MATCHFINDER_HASH_ORDER 16
 #endif
 
-#define BT_MATCHFINDER_HASH_LENGTH     (1UL << BT_MATCHFINDER_HASH_ORDER)
+#if MATCHFINDER_MAX_WINDOW_ORDER <= 16
+typedef u16 pos_t;
+#else
+typedef u32 pos_t;
+#endif
+
+/* Representation of a match found by the bt_matchfinder  */
+struct lz_match {
+
+       /* The number of bytes matched.  */
+       pos_t length;
+
+       /* The offset back from the current position that was matched.  */
+       pos_t offset;
+};
 
 struct bt_matchfinder {
-       pos_t hash_tab[BT_MATCHFINDER_HASH_LENGTH];
+       pos_t hash_tab[1UL << BT_MATCHFINDER_HASH_ORDER];
        pos_t child_tab[];
-} _aligned_attribute(MATCHFINDER_ALIGNMENT);
+};
 
 /* Return the number of bytes that must be allocated for a 'bt_matchfinder' that
  * can work with buffers up to the specified size.  */
 static inline size_t
 bt_matchfinder_size(size_t max_bufsize)
 {
-       return sizeof(pos_t) * (BT_MATCHFINDER_HASH_LENGTH + (2 * max_bufsize));
+       return sizeof(struct bt_matchfinder) + (2 * max_bufsize * sizeof(pos_t));
 }
 
 /* Prepare the matchfinder for a new input buffer.  */
 static inline void
 bt_matchfinder_init(struct bt_matchfinder *mf)
 {
-       matchfinder_init(mf->hash_tab, BT_MATCHFINDER_HASH_LENGTH);
+       memset(mf, 0, sizeof(*mf));
 }
 
 static inline u32
@@ -186,9 +205,9 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
        best_gt_len = 0;
        len = 0;
 
-       if (!matchfinder_node_valid(cur_node)) {
-               *pending_lt_ptr = MATCHFINDER_NULL;
-               *pending_gt_ptr = MATCHFINDER_NULL;
+       if (!cur_node) {
+               *pending_lt_ptr = 0;
+               *pending_gt_ptr = 0;
                *best_len_ret = best_len;
                return lz_matchptr;
        }
@@ -228,9 +247,9 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
                                len = best_lt_len;
                }
 
-               if (!matchfinder_node_valid(cur_node) || !--depth_remaining) {
-                       *pending_lt_ptr = MATCHFINDER_NULL;
-                       *pending_gt_ptr = MATCHFINDER_NULL;
+               if (!cur_node || !--depth_remaining) {
+                       *pending_lt_ptr = 0;
+                       *pending_gt_ptr = 0;
                        *best_len_ret = best_len;
                        return lz_matchptr;
                }
@@ -294,9 +313,9 @@ bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
        best_gt_len = 0;
        len = 0;
 
-       if (!matchfinder_node_valid(cur_node)) {
-               *pending_lt_ptr = MATCHFINDER_NULL;
-               *pending_gt_ptr = MATCHFINDER_NULL;
+       if (!cur_node) {
+               *pending_lt_ptr = 0;
+               *pending_gt_ptr = 0;
                return;
        }
 
@@ -328,9 +347,9 @@ bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
                                len = best_lt_len;
                }
 
-               if (!matchfinder_node_valid(cur_node) || !--depth_remaining) {
-                       *pending_lt_ptr = MATCHFINDER_NULL;
-                       *pending_gt_ptr = MATCHFINDER_NULL;
+               if (!cur_node || !--depth_remaining) {
+                       *pending_lt_ptr = 0;
+                       *pending_gt_ptr = 0;
                        return;
                }
        }
index 4c2c027..eb509d9 100644 (file)
@@ -54,9 +54,8 @@
  * (and therefore reduced cache pressure), the code only uses 32-bit integers if
  * they are needed to represent all possible positions.
  *
- * You must allocate the 'struct hc_matchfinder' on a
- * MATCHFINDER_ALIGNMENT-aligned boundary, and its necessary allocation size
- * must be gotten by calling hc_matchfinder_size().
+ * The number of bytes that must be allocated for a given 'struct
+ * hc_matchfinder' must be gotten by calling hc_matchfinder_size().
  *
  * ----------------------------------------------------------------------------
  *
 #ifndef _HC_MATCHFINDER_H
 #define _HC_MATCHFINDER_H
 
+#ifndef MATCHFINDER_MAX_WINDOW_ORDER
+#  error "MATCHFINDER_MAX_WINDOW_ORDER must be defined!"
+#endif
+
+#include <string.h>
+
 #include "wimlib/lz_extend.h"
 #include "wimlib/lz_hash.h"
-#include "wimlib/matchfinder_common.h"
 #include "wimlib/unaligned.h"
 
 #if MATCHFINDER_MAX_WINDOW_ORDER < 14
 #  define HC_MATCHFINDER_HASH_ORDER 15
 #endif
 
-#define HC_MATCHFINDER_HASH_LENGTH     (1UL << HC_MATCHFINDER_HASH_ORDER)
+#if MATCHFINDER_MAX_WINDOW_ORDER <= 16
+typedef u16 pos_t;
+#else
+typedef u32 pos_t;
+#endif
 
 struct hc_matchfinder {
-       pos_t hash_tab[HC_MATCHFINDER_HASH_LENGTH];
+       pos_t hash_tab[1UL << HC_MATCHFINDER_HASH_ORDER];
        pos_t next_tab[];
-} _aligned_attribute(MATCHFINDER_ALIGNMENT);
+};
 
 /* Return the number of bytes that must be allocated for a 'hc_matchfinder' that
  * can work with buffers up to the specified size.  */
 static inline size_t
 hc_matchfinder_size(size_t max_bufsize)
 {
-       return sizeof(pos_t) * (HC_MATCHFINDER_HASH_LENGTH + max_bufsize);
+       return sizeof(struct hc_matchfinder) + (max_bufsize * sizeof(pos_t));
 }
 
 /* Prepare the matchfinder for a new input buffer.  */
 static inline void
 hc_matchfinder_init(struct hc_matchfinder *mf)
 {
-       matchfinder_init(mf->hash_tab, HC_MATCHFINDER_HASH_LENGTH);
+       memset(mf, 0, sizeof(*mf));
 }
 
 /*
@@ -186,7 +194,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
 
        /* Search the appropriate linked list for matches.  */
 
-       if (!(matchfinder_node_valid(cur_node)))
+       if (!cur_node)
                goto out;
 
        if (best_len < 3) {
@@ -200,7 +208,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
 
                        /* The first 3 bytes did not match.  Keep trying.  */
                        cur_node = mf->next_tab[cur_node];
-                       if (!matchfinder_node_valid(cur_node) || !--depth_remaining)
+                       if (!cur_node || !--depth_remaining)
                                goto out;
                }
 
@@ -210,7 +218,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
                if (best_len >= nice_len)
                        goto out;
                cur_node = mf->next_tab[cur_node];
-               if (!matchfinder_node_valid(cur_node) || !--depth_remaining)
+               if (!cur_node || !--depth_remaining)
                        goto out;
        }
 
@@ -234,7 +242,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
                                break;
 
                        cur_node = mf->next_tab[cur_node];
-                       if (!matchfinder_node_valid(cur_node) || !--depth_remaining)
+                       if (!cur_node || !--depth_remaining)
                                goto out;
                }
 
@@ -251,7 +259,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
                                goto out;
                }
                cur_node = mf->next_tab[cur_node];
-               if (!matchfinder_node_valid(cur_node) || !--depth_remaining)
+               if (!cur_node || !--depth_remaining)
                        goto out;
        }
 out:
diff --git a/include/wimlib/matchfinder_avx2.h b/include/wimlib/matchfinder_avx2.h
deleted file mode 100644 (file)
index bdf10d2..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * matchfinder_avx2.h
- *
- * Matchfinding routines optimized for Intel AVX2 (Advanced Vector Extensions).
- *
- * Author:     Eric Biggers
- * Year:       2014, 2015
- *
- * The author dedicates this file to the public domain.
- * You can do whatever you want with this file.
- */
-
-#include <immintrin.h>
-
-static inline bool
-matchfinder_init_avx2(pos_t *data, size_t size)
-{
-       __m256i v, *p;
-       size_t n;
-
-       if (size % sizeof(__m256i) * 4)
-               return false;
-
-       if (sizeof(pos_t) == 2)
-               v = _mm256_set1_epi16((u16)MATCHFINDER_NULL);
-       else if (sizeof(pos_t) == 4)
-               v = _mm256_set1_epi32((u32)MATCHFINDER_NULL);
-       else
-               return false;
-
-       p = (__m256i *)data;
-       n = size / (sizeof(__m256i) * 4);
-       do {
-               p[0] = v;
-               p[1] = v;
-               p[2] = v;
-               p[3] = v;
-               p += 4;
-       } while (--n);
-       return true;
-}
diff --git a/include/wimlib/matchfinder_common.h b/include/wimlib/matchfinder_common.h
deleted file mode 100644 (file)
index 2372bd6..0000000
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * matchfinder_common.h
- *
- * Common code for Lempel-Ziv matchfinding.
- *
- * Author:     Eric Biggers
- * Year:       2014, 2015
- *
- * The author dedicates this file to the public domain.
- * You can do whatever you want with this file.
- */
-
-#ifndef _MATCHFINDER_COMMON_H
-#define _MATCHFINDER_COMMON_H
-
-#include <string.h>
-
-#include "wimlib/types.h"
-
-#ifndef MATCHFINDER_MAX_WINDOW_ORDER
-#  error "MATCHFINDER_MAX_WINDOW_ORDER must be defined!"
-#endif
-
-#if MATCHFINDER_MAX_WINDOW_ORDER <= 16
-typedef u16 pos_t;
-#else
-typedef u32 pos_t;
-#endif
-
-#if MATCHFINDER_MAX_WINDOW_ORDER != 16 && MATCHFINDER_MAX_WINDOW_ORDER != 32
-
-/* Not all the bits of the position type are needed, so the sign bit can be
- * reserved to mean "out of bounds".  */
-#define MATCHFINDER_NULL ((pos_t)-1)
-
-static inline bool
-matchfinder_node_valid(pos_t node)
-{
-       return !(node & ((pos_t)1 << (sizeof(pos_t) * 8 - 1)));
-}
-
-#else
-
-/* All bits of the position type are needed, so use 0 to mean "out of bounds".
- * This prevents the beginning of the buffer from matching anything; however,
- * this doesn't matter much.  */
-
-#define MATCHFINDER_NULL ((pos_t)0)
-
-static inline bool
-matchfinder_node_valid(pos_t node)
-{
-       return node != 0;
-}
-
-#endif
-
-#define MATCHFINDER_ALIGNMENT 8
-
-#ifdef __AVX2__
-#  include "matchfinder_avx2.h"
-#  if MATCHFINDER_ALIGNMENT < 32
-#    undef MATCHFINDER_ALIGNMENT
-#    define MATCHFINDER_ALIGNMENT 32
-#  endif
-#endif
-
-#ifdef __SSE2__
-#  include "matchfinder_sse2.h"
-#  if MATCHFINDER_ALIGNMENT < 16
-#    undef MATCHFINDER_ALIGNMENT
-#    define MATCHFINDER_ALIGNMENT 16
-#  endif
-#endif
-
-/*
- * Representation of a match.
- */
-struct lz_match {
-
-       /* The number of bytes matched.  */
-       pos_t length;
-
-       /* The offset back from the current position that was matched.  */
-       pos_t offset;
-};
-
-static inline bool
-matchfinder_memset_init_okay(void)
-{
-       /* All bytes must match in order to use memset.  */
-       const pos_t v = MATCHFINDER_NULL;
-       if (sizeof(pos_t) == 2)
-               return (u8)v == (u8)(v >> 8);
-       if (sizeof(pos_t) == 4)
-               return (u8)v == (u8)(v >> 8) &&
-                      (u8)v == (u8)(v >> 16) &&
-                      (u8)v == (u8)(v >> 24);
-       return false;
-}
-
-/*
- * Initialize the hash table portion of the matchfinder.
- *
- * Essentially, this is an optimized memset().
- *
- * 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary.
- */
-static inline void
-matchfinder_init(pos_t *data, size_t num_entries)
-{
-       const size_t size = num_entries * sizeof(data[0]);
-
-#ifdef __AVX2__
-       if (matchfinder_init_avx2(data, size))
-               return;
-#endif
-
-#ifdef __SSE2__
-       if (matchfinder_init_sse2(data, size))
-               return;
-#endif
-
-       if (matchfinder_memset_init_okay()) {
-               memset(data, (u8)MATCHFINDER_NULL, size);
-               return;
-       }
-
-       for (size_t i = 0; i < num_entries; i++)
-               data[i] = MATCHFINDER_NULL;
-}
-
-#endif /* _MATCHFINDER_COMMON_H */
diff --git a/include/wimlib/matchfinder_sse2.h b/include/wimlib/matchfinder_sse2.h
deleted file mode 100644 (file)
index 9b0b080..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * matchfinder_sse2.h
- *
- * Matchfinding routines optimized for Intel SSE2 (Streaming SIMD Extensions).
- *
- * Author:     Eric Biggers
- * Year:       2014, 2015
- *
- * The author dedicates this file to the public domain.
- * You can do whatever you want with this file.
- */
-
-#include <emmintrin.h>
-
-static inline bool
-matchfinder_init_sse2(pos_t *data, size_t size)
-{
-       __m128i v, *p;
-       size_t n;
-
-       if (size % sizeof(__m128i) * 4)
-               return false;
-
-       if (sizeof(pos_t) == 2)
-               v = _mm_set1_epi16((u16)MATCHFINDER_NULL);
-       else if (sizeof(pos_t) == 4)
-               v = _mm_set1_epi32((u32)MATCHFINDER_NULL);
-       else
-               return false;
-
-       p = (__m128i *)data;
-       n = size / (sizeof(__m128i) * 4);
-       do {
-               p[0] = v;
-               p[1] = v;
-               p[2] = v;
-               p[3] = v;
-               p += 4;
-       } while (--n);
-       return true;
-}
index 339f27f..5e1be48 100644 (file)
@@ -466,8 +466,7 @@ struct lzx_compressor {
                                                    LZX_MAX_MATCH_LEN - 1];
 
                        /* Hash table for finding length 2 matches  */
-                       pos_t hash2_tab[LZX_HASH2_LENGTH]
-                               _aligned_attribute(MATCHFINDER_ALIGNMENT);
+                       pos_t hash2_tab[LZX_HASH2_LENGTH];
 
                        /* Binary trees matchfinder (MUST BE LAST!!!)  */
                        struct bt_matchfinder bt_mf;
@@ -1600,7 +1599,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c,
        struct lzx_lru_queue queue;
 
        bt_matchfinder_init(&c->bt_mf);
-       matchfinder_init(c->hash2_tab, LZX_HASH2_LENGTH);
+       memset(c->hash2_tab, 0, sizeof(c->hash2_tab));
        next_hash = bt_matchfinder_hash_3_bytes(in_next);
        lzx_lru_queue_init(&queue);
 
@@ -1643,7 +1642,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c,
                        hash2 = lz_hash_2_bytes(in_next, LZX_HASH2_ORDER);
                        cur_match = c->hash2_tab[hash2];
                        c->hash2_tab[hash2] = in_next - in_begin;
-                       if (matchfinder_node_valid(cur_match) &&
+                       if (cur_match != 0 &&
                            (LZX_HASH2_ORDER == 16 ||
                             load_u16_unaligned(&in_begin[cur_match]) ==
                             load_u16_unaligned(in_next)))
@@ -2037,9 +2036,7 @@ lzx_create_compressor(size_t max_bufsize, unsigned compression_level,
        if (window_order == 0)
                return WIMLIB_ERR_INVALID_PARAM;
 
-       c = ALIGNED_MALLOC(lzx_get_compressor_size(max_bufsize,
-                                                  compression_level),
-                          MATCHFINDER_ALIGNMENT);
+       c = MALLOC(lzx_get_compressor_size(max_bufsize, compression_level));
        if (!c)
                goto oom0;
 
@@ -2114,7 +2111,7 @@ lzx_create_compressor(size_t max_bufsize, unsigned compression_level,
        return 0;
 
 oom1:
-       ALIGNED_FREE(c);
+       FREE(c);
 oom0:
        return WIMLIB_ERR_NOMEM;
 }
@@ -2163,7 +2160,7 @@ lzx_free_compressor(void *_c)
 
        if (!c->destructive)
                FREE(c->in_buffer);
-       ALIGNED_FREE(c);
+       FREE(c);
 }
 
 const struct compressor_ops lzx_compressor_ops = {
index 6750160..2f3a35e 100644 (file)
@@ -1068,8 +1068,7 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
        if (max_bufsize > XPRESS_MAX_BUFSIZE)
                return WIMLIB_ERR_INVALID_PARAM;
 
-       c = ALIGNED_MALLOC(xpress_get_compressor_size(max_bufsize, compression_level),
-                          MATCHFINDER_ALIGNMENT);
+       c = MALLOC(xpress_get_compressor_size(max_bufsize, compression_level));
        if (!c)
                goto oom0;
 
@@ -1129,7 +1128,7 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
        return 0;
 
 oom1:
-       ALIGNED_FREE(c);
+       FREE(c);
 oom0:
        return WIMLIB_ERR_NOMEM;
 }
@@ -1164,7 +1163,7 @@ xpress_free_compressor(void *_c)
        } else
 #endif
                FREE(c->chosen_items);
-       ALIGNED_FREE(c);
+       FREE(c);
 }
 
 const struct compressor_ops xpress_compressor_ops = {