X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fdecompress_common.c;h=49cd5ba0f083afa64248246080830fd93dfbb589;hp=9743c7d0da6e9748d3e525839ce48fc3702d27d9;hb=a8343baff22193be4651a3a63b07cb1520ced4e8;hpb=67c5f2c5954d07c25f57b6d9aedb73fb3720ec0b

diff --git a/src/decompress_common.c b/src/decompress_common.c
index 9743c7d0..49cd5ba0 100644
--- a/src/decompress_common.c
+++ b/src/decompress_common.c
@@ -2,25 +2,12 @@
  * decompress_common.c
  *
  * Code for decompression shared among multiple compression formats.
- */
-
-/*
- * Copyright (C) 2012, 2013, 2014 Eric Biggers
- *
- * This file is part of wimlib, a library for working with WIM files.
- *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
  *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
+ * Author:  Eric Biggers
+ * Year:    2012 - 2014
  *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * The author dedicates this file to the public domain.
+ * You can do whatever you want with this file.
  */
 
 #ifdef HAVE_CONFIG_H
@@ -28,17 +15,16 @@
 #endif
 
 #include "wimlib/decompress_common.h"
-#include "wimlib/error.h"
-#include "wimlib/util.h"
 
 #include <string.h>
 
+#define USE_WORD_FILL
+
 #ifdef __GNUC__
 #  ifdef __SSE2__
+#    undef USE_WORD_FILL
 #    define USE_SSE2_FILL
 #    include <emmintrin.h>
-#  else
-#    define USE_LONG_FILL
 #  endif
 #endif
 
@@ -156,22 +142,14 @@ make_huffman_decode_table(u16 decode_table[const restrict],
 	unsigned stores_per_loop;
 	unsigned decode_table_pos;
 
-#ifdef USE_LONG_FILL
-	const unsigned entries_per_long = sizeof(unsigned long) / sizeof(decode_table[0]);
+#ifdef USE_WORD_FILL
+	const unsigned entries_per_word = WORDSIZE / sizeof(decode_table[0]);
 #endif
 
 #ifdef USE_SSE2_FILL
 	const unsigned entries_per_xmm = sizeof(__m128i) / sizeof(decode_table[0]);
 #endif
 
-	/* Check parameters if assertions are enabled.  */
-	wimlib_assert2((uintptr_t)decode_table % DECODE_TABLE_ALIGNMENT == 0);
-	wimlib_assert2(num_syms <= DECODE_TABLE_MAX_SYMBOLS);
-	wimlib_assert2(table_bits <= DECODE_TABLE_MAX_TABLE_BITS);
-	wimlib_assert2(max_codeword_len <= DECODE_TABLE_MAX_CODEWORD_LEN);
-	for (unsigned sym = 0; sym < num_syms; sym++)
-		wimlib_assert2(lens[sym] <= max_codeword_len);
-
 	/* Count how many symbols have each possible codeword length.
 	 * Note that a length of 0 indicates the corresponding symbol is not
 	 * used in the code and therefore does not have a codeword.  */
@@ -192,7 +170,6 @@ make_huffman_decode_table(u16 decode_table[const restrict],
 		if (unlikely(left < 0)) {
 			/* The lengths overflow the codespace; that is, the code
 			 * is over-subscribed.  */
-			DEBUG("Invalid prefix code (over-subscribed)");
 			return -1;
 		}
 	}
@@ -214,7 +191,6 @@ make_huffman_decode_table(u16 decode_table[const restrict],
 			       table_num_entries * sizeof(decode_table[0]));
 			return 0;
 		}
-		DEBUG("Invalid prefix code (incomplete set)");
 		return -1;
 	}
 
@@ -246,7 +222,7 @@ make_huffman_decode_table(u16 decode_table[const restrict],
 	 * have the most entries.  From there, the number of entries per
 	 * codeword will decrease.  As an optimization, we may begin filling
 	 * entries with SSE2 vector accesses (8 entries/store), then change to
-	 * 'unsigned long' accesses (2 or 4 entries/store), then change to
+	 * 'machine_word_t' accesses (2 or 4 entries/store), then change to
 	 * 16-bit accesses (1 entry/store).  */
 	decode_table_ptr = decode_table;
 	sym_idx = 0;
@@ -258,11 +234,11 @@ make_huffman_decode_table(u16 decode_table[const restrict],
 	for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
 		unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
 		for (; sym_idx < end_sym_idx; sym_idx++) {
-			/* Note: unlike in the 'long' version below, the __m128i
-			 * type already has __attribute__((may_alias)), so using
-			 * it to access the decode table, which is an array of
-			 * unsigned shorts, will not violate strict aliasing.
-			 */
+			/* Note: unlike in the machine_word_t version below, the
+			 * __m128i type already has __attribute__((may_alias)),
+			 * so using it to access the decode table, which is an
+			 * array of unsigned shorts, will not violate strict
+			 * aliasing.  */
 			u16 entry;
 			__m128i v;
 			__m128i *p;
@@ -281,42 +257,33 @@ make_huffman_decode_table(u16 decode_table[const restrict],
 	}
 #endif /* USE_SSE2_FILL */
 
-#ifdef USE_LONG_FILL
-	/* Fill the entries one 'unsigned long' at a time.
+#ifdef USE_WORD_FILL
+	/* Fill the entries one machine word at a time.
 	 * On 32-bit systems this is 2 entries per store, while on 64-bit
 	 * systems this is 4 entries per store.  */
-	stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_long;
+	stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_word;
 	for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
 		unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
 		for (; sym_idx < end_sym_idx; sym_idx++) {
 
-			/* Accessing the array of unsigned shorts as unsigned
-			 * longs would violate strict aliasing and would require
-			 * compiling the code with -fno-strict-aliasing to
-			 * guarantee correctness.  To work around this problem,
-			 * use the gcc 'may_alias' extension to define a special
-			 * unsigned long type that may alias any other in-memory
-			 * variable.  */
-			typedef unsigned long __attribute__((may_alias)) aliased_long_t;
-
-			unsigned long v;
-			aliased_long_t *p;
+			/* Accessing the array of u16 as u32 or u64 would
+			 * violate strict aliasing and would require compiling
+			 * the code with -fno-strict-aliasing to guarantee
+			 * correctness.  To work around this problem, use the
+			 * gcc 'may_alias' extension.  */
+			typedef machine_word_t _may_alias_attribute aliased_word_t;
+
+			machine_word_t v;
+			aliased_word_t *p;
 			unsigned n;
 
-			BUILD_BUG_ON(sizeof(unsigned long) != 4 &&
-				     sizeof(unsigned long) != 8);
+			BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
 
 			v = MAKE_DIRECT_ENTRY(sorted_syms[sym_idx], codeword_len);
 			v |= v << 16;
-			if (sizeof(unsigned long) == 8) {
-				/* This may produce a compiler warning if an
-				 * 'unsigned long' is 32 bits, but this won't be
-				 * executed unless an 'unsigned long' is at
-				 * least 64 bits anyway.  */
-				v |= v << 32;
-			}
+			v |= v << (WORDSIZE == 8 ? 32 : 0);
 
-			p = (aliased_long_t *)decode_table_ptr;
+			p = (aliased_word_t *)decode_table_ptr;
 			n = stores_per_loop;
 
 			do {
@@ -325,7 +292,7 @@ make_huffman_decode_table(u16 decode_table[const restrict],
 			decode_table_ptr = p;
 		}
 	}
-#endif /* USE_LONG_FILL */
+#endif /* USE_WORD_FILL */
 
 	/* Fill the entries one 16-bit integer at a time.  */
 	stores_per_loop = (1 << (table_bits - codeword_len));
@@ -350,7 +317,7 @@ make_huffman_decode_table(u16 decode_table[const restrict],
 	}
 
 	/* If we've filled in the entire table, we are done.  Otherwise,
-	 * there are codes longer than table_bits for which we must
+	 * there are codewords longer than table_bits for which we must
 	 * generate binary trees.  */
 
 	decode_table_pos = (u16*)decode_table_ptr - decode_table;
@@ -359,11 +326,11 @@ make_huffman_decode_table(u16 decode_table[const restrict],
 		unsigned next_free_tree_slot;
 		unsigned cur_codeword;
 
-		/* First, zero out the rest of the entries.  This is
-		 * necessary so that the entries appear as "unallocated"
-		 * in the next part.  Each of these entries will
-		 * eventually be filled the representation of the root
-		 * node of a binary tree.  */
+		/* First, zero out the remaining entries.  This is
+		 * necessary so that these entries appear as
+		 * "unallocated" in the next part.  Each of these entries
+		 * will eventually be filled with the representation of
+		 * the root node of a binary tree.  */
 		j = decode_table_pos;
 		do {
 			decode_table[j] = 0;
@@ -371,7 +338,8 @@ make_huffman_decode_table(u16 decode_table[const restrict],
 
 		/* We allocate child nodes starting at the end of the
 		 * direct lookup table.  Note that there should be
-		 * 2*num_syms extra entries for this purpose.  */
+		 * 2*num_syms extra entries for this purpose, although
+		 * fewer than this may actually be needed.  */
 		next_free_tree_slot = table_num_entries;
 
 		/* Iterate through each codeword with length greater than