From 80c2fe3e6463cfd0eca5bead23a08731b6db9576 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 19 Sep 2015 13:55:59 -0500 Subject: [PATCH] Get rid of matchfinder_common.h and manual memsets --- Makefile.am | 3 - include/wimlib/bt_matchfinder.h | 55 ++++++++---- include/wimlib/hc_matchfinder.h | 36 +++++--- include/wimlib/matchfinder_avx2.h | 41 --------- include/wimlib/matchfinder_common.h | 133 ---------------------------- include/wimlib/matchfinder_sse2.h | 41 --------- src/lzx_compress.c | 15 ++-- src/xpress_compress.c | 7 +- 8 files changed, 68 insertions(+), 263 deletions(-) delete mode 100644 include/wimlib/matchfinder_avx2.h delete mode 100644 include/wimlib/matchfinder_common.h delete mode 100644 include/wimlib/matchfinder_sse2.h diff --git a/Makefile.am b/Makefile.am index b01b520a..cb527b77 100644 --- a/Makefile.am +++ b/Makefile.am @@ -129,9 +129,6 @@ libwim_la_SOURCES = \ include/wimlib/lzms_constants.h \ include/wimlib/lzx_common.h \ include/wimlib/lzx_constants.h \ - include/wimlib/matchfinder_avx2.h \ - include/wimlib/matchfinder_common.h \ - include/wimlib/matchfinder_sse2.h \ include/wimlib/metadata.h \ include/wimlib/pathlist.h \ include/wimlib/paths.h \ diff --git a/include/wimlib/bt_matchfinder.h b/include/wimlib/bt_matchfinder.h index 4fe754c9..189c5a15 100644 --- a/include/wimlib/bt_matchfinder.h +++ b/include/wimlib/bt_matchfinder.h @@ -48,9 +48,14 @@ #ifndef _BT_MATCHFINDER_H #define _BT_MATCHFINDER_H +#ifndef MATCHFINDER_MAX_WINDOW_ORDER +# error "MATCHFINDER_MAX_WINDOW_ORDER must be defined!" +#endif + +#include + #include "wimlib/lz_extend.h" #include "wimlib/lz_hash.h" -#include "wimlib/matchfinder_common.h" #if MATCHFINDER_MAX_WINDOW_ORDER < 13 # define BT_MATCHFINDER_HASH_ORDER 14 @@ -60,26 +65,40 @@ # define BT_MATCHFINDER_HASH_ORDER 16 #endif -#define BT_MATCHFINDER_HASH_LENGTH (1UL << BT_MATCHFINDER_HASH_ORDER) +#if MATCHFINDER_MAX_WINDOW_ORDER <= 16 +typedef u16 pos_t; +#else +typedef u32 pos_t; +#endif + +/* Representation of a match found by the bt_matchfinder */ +struct lz_match { + + /* The number of bytes matched. */ + pos_t length; + + /* The offset back from the current position that was matched. */ + pos_t offset; +}; struct bt_matchfinder { - pos_t hash_tab[BT_MATCHFINDER_HASH_LENGTH]; + pos_t hash_tab[1UL << BT_MATCHFINDER_HASH_ORDER]; pos_t child_tab[]; -} _aligned_attribute(MATCHFINDER_ALIGNMENT); +}; /* Return the number of bytes that must be allocated for a 'bt_matchfinder' that * can work with buffers up to the specified size. */ static inline size_t bt_matchfinder_size(size_t max_bufsize) { - return sizeof(pos_t) * (BT_MATCHFINDER_HASH_LENGTH + (2 * max_bufsize)); + return sizeof(struct bt_matchfinder) + (2 * max_bufsize * sizeof(pos_t)); } /* Prepare the matchfinder for a new input buffer. */ static inline void bt_matchfinder_init(struct bt_matchfinder *mf) { - matchfinder_init(mf->hash_tab, BT_MATCHFINDER_HASH_LENGTH); + memset(mf, 0, sizeof(*mf)); } static inline u32 @@ -186,9 +205,9 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf, best_gt_len = 0; len = 0; - if (!matchfinder_node_valid(cur_node)) { - *pending_lt_ptr = MATCHFINDER_NULL; - *pending_gt_ptr = MATCHFINDER_NULL; + if (!cur_node) { + *pending_lt_ptr = 0; + *pending_gt_ptr = 0; *best_len_ret = best_len; return lz_matchptr; } @@ -228,9 +247,9 @@ bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf, len = best_lt_len; } - if (!matchfinder_node_valid(cur_node) || !--depth_remaining) { - *pending_lt_ptr = MATCHFINDER_NULL; - *pending_gt_ptr = MATCHFINDER_NULL; + if (!cur_node || !--depth_remaining) { + *pending_lt_ptr = 0; + *pending_gt_ptr = 0; *best_len_ret = best_len; return lz_matchptr; } @@ -294,9 +313,9 @@ bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf, best_gt_len = 0; len = 0; - if (!matchfinder_node_valid(cur_node)) { - *pending_lt_ptr = MATCHFINDER_NULL; - *pending_gt_ptr = MATCHFINDER_NULL; + if (!cur_node) { + *pending_lt_ptr = 0; + *pending_gt_ptr = 0; return; } @@ -328,9 +347,9 @@ bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf, len = best_lt_len; } - if (!matchfinder_node_valid(cur_node) || !--depth_remaining) { - *pending_lt_ptr = MATCHFINDER_NULL; - *pending_gt_ptr = MATCHFINDER_NULL; + if (!cur_node || !--depth_remaining) { + *pending_lt_ptr = 0; + *pending_gt_ptr = 0; return; } } diff --git a/include/wimlib/hc_matchfinder.h b/include/wimlib/hc_matchfinder.h index 4c2c0271..eb509d9a 100644 --- a/include/wimlib/hc_matchfinder.h +++ b/include/wimlib/hc_matchfinder.h @@ -54,9 +54,8 @@ * (and therefore reduced cache pressure), the code only uses 32-bit integers if * they are needed to represent all possible positions. * - * You must allocate the 'struct hc_matchfinder' on a - * MATCHFINDER_ALIGNMENT-aligned boundary, and its necessary allocation size - * must be gotten by calling hc_matchfinder_size(). + * The number of bytes that must be allocated for a given 'struct + * hc_matchfinder' must be gotten by calling hc_matchfinder_size(). * * ---------------------------------------------------------------------------- * @@ -96,9 +95,14 @@ #ifndef _HC_MATCHFINDER_H #define _HC_MATCHFINDER_H +#ifndef MATCHFINDER_MAX_WINDOW_ORDER +# error "MATCHFINDER_MAX_WINDOW_ORDER must be defined!" +#endif + +#include + #include "wimlib/lz_extend.h" #include "wimlib/lz_hash.h" -#include "wimlib/matchfinder_common.h" #include "wimlib/unaligned.h" #if MATCHFINDER_MAX_WINDOW_ORDER < 14 @@ -107,26 +111,30 @@ # define HC_MATCHFINDER_HASH_ORDER 15 #endif -#define HC_MATCHFINDER_HASH_LENGTH (1UL << HC_MATCHFINDER_HASH_ORDER) +#if MATCHFINDER_MAX_WINDOW_ORDER <= 16 +typedef u16 pos_t; +#else +typedef u32 pos_t; +#endif struct hc_matchfinder { - pos_t hash_tab[HC_MATCHFINDER_HASH_LENGTH]; + pos_t hash_tab[1UL << HC_MATCHFINDER_HASH_ORDER]; pos_t next_tab[]; -} _aligned_attribute(MATCHFINDER_ALIGNMENT); +}; /* Return the number of bytes that must be allocated for a 'hc_matchfinder' that * can work with buffers up to the specified size. */ static inline size_t hc_matchfinder_size(size_t max_bufsize) { - return sizeof(pos_t) * (HC_MATCHFINDER_HASH_LENGTH + max_bufsize); + return sizeof(struct hc_matchfinder) + (max_bufsize * sizeof(pos_t)); } /* Prepare the matchfinder for a new input buffer. */ static inline void hc_matchfinder_init(struct hc_matchfinder *mf) { - matchfinder_init(mf->hash_tab, HC_MATCHFINDER_HASH_LENGTH); + memset(mf, 0, sizeof(*mf)); } /* @@ -186,7 +194,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf, /* Search the appropriate linked list for matches. */ - if (!(matchfinder_node_valid(cur_node))) + if (!cur_node) goto out; if (best_len < 3) { @@ -200,7 +208,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf, /* The first 3 bytes did not match. Keep trying. */ cur_node = mf->next_tab[cur_node]; - if (!matchfinder_node_valid(cur_node) || !--depth_remaining) + if (!cur_node || !--depth_remaining) goto out; } @@ -210,7 +218,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf, if (best_len >= nice_len) goto out; cur_node = mf->next_tab[cur_node]; - if (!matchfinder_node_valid(cur_node) || !--depth_remaining) + if (!cur_node || !--depth_remaining) goto out; } @@ -234,7 +242,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf, break; cur_node = mf->next_tab[cur_node]; - if (!matchfinder_node_valid(cur_node) || !--depth_remaining) + if (!cur_node || !--depth_remaining) goto out; } @@ -251,7 +259,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf, goto out; } cur_node = mf->next_tab[cur_node]; - if (!matchfinder_node_valid(cur_node) || !--depth_remaining) + if (!cur_node || !--depth_remaining) goto out; } out: diff --git a/include/wimlib/matchfinder_avx2.h b/include/wimlib/matchfinder_avx2.h deleted file mode 100644 index bdf10d21..00000000 --- a/include/wimlib/matchfinder_avx2.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * matchfinder_avx2.h - * - * Matchfinding routines optimized for Intel AVX2 (Advanced Vector Extensions). - * - * Author: Eric Biggers - * Year: 2014, 2015 - * - * The author dedicates this file to the public domain. - * You can do whatever you want with this file. - */ - -#include - -static inline bool -matchfinder_init_avx2(pos_t *data, size_t size) -{ - __m256i v, *p; - size_t n; - - if (size % sizeof(__m256i) * 4) - return false; - - if (sizeof(pos_t) == 2) - v = _mm256_set1_epi16((u16)MATCHFINDER_NULL); - else if (sizeof(pos_t) == 4) - v = _mm256_set1_epi32((u32)MATCHFINDER_NULL); - else - return false; - - p = (__m256i *)data; - n = size / (sizeof(__m256i) * 4); - do { - p[0] = v; - p[1] = v; - p[2] = v; - p[3] = v; - p += 4; - } while (--n); - return true; -} diff --git a/include/wimlib/matchfinder_common.h b/include/wimlib/matchfinder_common.h deleted file mode 100644 index 2372bd66..00000000 --- a/include/wimlib/matchfinder_common.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * matchfinder_common.h - * - * Common code for Lempel-Ziv matchfinding. - * - * Author: Eric Biggers - * Year: 2014, 2015 - * - * The author dedicates this file to the public domain. - * You can do whatever you want with this file. - */ - -#ifndef _MATCHFINDER_COMMON_H -#define _MATCHFINDER_COMMON_H - -#include - -#include "wimlib/types.h" - -#ifndef MATCHFINDER_MAX_WINDOW_ORDER -# error "MATCHFINDER_MAX_WINDOW_ORDER must be defined!" -#endif - -#if MATCHFINDER_MAX_WINDOW_ORDER <= 16 -typedef u16 pos_t; -#else -typedef u32 pos_t; -#endif - -#if MATCHFINDER_MAX_WINDOW_ORDER != 16 && MATCHFINDER_MAX_WINDOW_ORDER != 32 - -/* Not all the bits of the position type are needed, so the sign bit can be - * reserved to mean "out of bounds". */ -#define MATCHFINDER_NULL ((pos_t)-1) - -static inline bool -matchfinder_node_valid(pos_t node) -{ - return !(node & ((pos_t)1 << (sizeof(pos_t) * 8 - 1))); -} - -#else - -/* All bits of the position type are needed, so use 0 to mean "out of bounds". - * This prevents the beginning of the buffer from matching anything; however, - * this doesn't matter much. */ - -#define MATCHFINDER_NULL ((pos_t)0) - -static inline bool -matchfinder_node_valid(pos_t node) -{ - return node != 0; -} - -#endif - -#define MATCHFINDER_ALIGNMENT 8 - -#ifdef __AVX2__ -# include "matchfinder_avx2.h" -# if MATCHFINDER_ALIGNMENT < 32 -# undef MATCHFINDER_ALIGNMENT -# define MATCHFINDER_ALIGNMENT 32 -# endif -#endif - -#ifdef __SSE2__ -# include "matchfinder_sse2.h" -# if MATCHFINDER_ALIGNMENT < 16 -# undef MATCHFINDER_ALIGNMENT -# define MATCHFINDER_ALIGNMENT 16 -# endif -#endif - -/* - * Representation of a match. - */ -struct lz_match { - - /* The number of bytes matched. */ - pos_t length; - - /* The offset back from the current position that was matched. */ - pos_t offset; -}; - -static inline bool -matchfinder_memset_init_okay(void) -{ - /* All bytes must match in order to use memset. */ - const pos_t v = MATCHFINDER_NULL; - if (sizeof(pos_t) == 2) - return (u8)v == (u8)(v >> 8); - if (sizeof(pos_t) == 4) - return (u8)v == (u8)(v >> 8) && - (u8)v == (u8)(v >> 16) && - (u8)v == (u8)(v >> 24); - return false; -} - -/* - * Initialize the hash table portion of the matchfinder. - * - * Essentially, this is an optimized memset(). - * - * 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary. - */ -static inline void -matchfinder_init(pos_t *data, size_t num_entries) -{ - const size_t size = num_entries * sizeof(data[0]); - -#ifdef __AVX2__ - if (matchfinder_init_avx2(data, size)) - return; -#endif - -#ifdef __SSE2__ - if (matchfinder_init_sse2(data, size)) - return; -#endif - - if (matchfinder_memset_init_okay()) { - memset(data, (u8)MATCHFINDER_NULL, size); - return; - } - - for (size_t i = 0; i < num_entries; i++) - data[i] = MATCHFINDER_NULL; -} - -#endif /* _MATCHFINDER_COMMON_H */ diff --git a/include/wimlib/matchfinder_sse2.h b/include/wimlib/matchfinder_sse2.h deleted file mode 100644 index 9b0b080b..00000000 --- a/include/wimlib/matchfinder_sse2.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * matchfinder_sse2.h - * - * Matchfinding routines optimized for Intel SSE2 (Streaming SIMD Extensions). - * - * Author: Eric Biggers - * Year: 2014, 2015 - * - * The author dedicates this file to the public domain. - * You can do whatever you want with this file. - */ - -#include - -static inline bool -matchfinder_init_sse2(pos_t *data, size_t size) -{ - __m128i v, *p; - size_t n; - - if (size % sizeof(__m128i) * 4) - return false; - - if (sizeof(pos_t) == 2) - v = _mm_set1_epi16((u16)MATCHFINDER_NULL); - else if (sizeof(pos_t) == 4) - v = _mm_set1_epi32((u32)MATCHFINDER_NULL); - else - return false; - - p = (__m128i *)data; - n = size / (sizeof(__m128i) * 4); - do { - p[0] = v; - p[1] = v; - p[2] = v; - p[3] = v; - p += 4; - } while (--n); - return true; -} diff --git a/src/lzx_compress.c b/src/lzx_compress.c index 339f27fe..5e1be485 100644 --- a/src/lzx_compress.c +++ b/src/lzx_compress.c @@ -466,8 +466,7 @@ struct lzx_compressor { LZX_MAX_MATCH_LEN - 1]; /* Hash table for finding length 2 matches */ - pos_t hash2_tab[LZX_HASH2_LENGTH] - _aligned_attribute(MATCHFINDER_ALIGNMENT); + pos_t hash2_tab[LZX_HASH2_LENGTH]; /* Binary trees matchfinder (MUST BE LAST!!!) */ struct bt_matchfinder bt_mf; @@ -1600,7 +1599,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c, struct lzx_lru_queue queue; bt_matchfinder_init(&c->bt_mf); - matchfinder_init(c->hash2_tab, LZX_HASH2_LENGTH); + memset(c->hash2_tab, 0, sizeof(c->hash2_tab)); next_hash = bt_matchfinder_hash_3_bytes(in_next); lzx_lru_queue_init(&queue); @@ -1643,7 +1642,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c, hash2 = lz_hash_2_bytes(in_next, LZX_HASH2_ORDER); cur_match = c->hash2_tab[hash2]; c->hash2_tab[hash2] = in_next - in_begin; - if (matchfinder_node_valid(cur_match) && + if (cur_match != 0 && (LZX_HASH2_ORDER == 16 || load_u16_unaligned(&in_begin[cur_match]) == load_u16_unaligned(in_next))) @@ -2037,9 +2036,7 @@ lzx_create_compressor(size_t max_bufsize, unsigned compression_level, if (window_order == 0) return WIMLIB_ERR_INVALID_PARAM; - c = ALIGNED_MALLOC(lzx_get_compressor_size(max_bufsize, - compression_level), - MATCHFINDER_ALIGNMENT); + c = MALLOC(lzx_get_compressor_size(max_bufsize, compression_level)); if (!c) goto oom0; @@ -2114,7 +2111,7 @@ lzx_create_compressor(size_t max_bufsize, unsigned compression_level, return 0; oom1: - ALIGNED_FREE(c); + FREE(c); oom0: return WIMLIB_ERR_NOMEM; } @@ -2163,7 +2160,7 @@ lzx_free_compressor(void *_c) if (!c->destructive) FREE(c->in_buffer); - ALIGNED_FREE(c); + FREE(c); } const struct compressor_ops lzx_compressor_ops = { diff --git a/src/xpress_compress.c b/src/xpress_compress.c index 67501604..2f3a35e9 100644 --- a/src/xpress_compress.c +++ b/src/xpress_compress.c @@ -1068,8 +1068,7 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level, if (max_bufsize > XPRESS_MAX_BUFSIZE) return WIMLIB_ERR_INVALID_PARAM; - c = ALIGNED_MALLOC(xpress_get_compressor_size(max_bufsize, compression_level), - MATCHFINDER_ALIGNMENT); + c = MALLOC(xpress_get_compressor_size(max_bufsize, compression_level)); if (!c) goto oom0; @@ -1129,7 +1128,7 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level, return 0; oom1: - ALIGNED_FREE(c); + FREE(c); oom0: return WIMLIB_ERR_NOMEM; } @@ -1164,7 +1163,7 @@ xpress_free_compressor(void *_c) } else #endif FREE(c->chosen_items); - ALIGNED_FREE(c); + FREE(c); } const struct compressor_ops xpress_compressor_ops = { -- 2.43.0