* decompress_common.c
*
* Code for decompression shared among multiple compression formats.
- */
-
-/*
- * Copyright (C) 2012, 2013, 2014 Eric Biggers
- *
- * This file is part of wimlib, a library for working with WIM files.
- *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
*
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
+ * Author: Eric Biggers
+ * Year: 2012 - 2014
*
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * The author dedicates this file to the public domain.
+ * You can do whatever you want with this file.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
+#include "wimlib/assert.h"
#include "wimlib/decompress_common.h"
-#include "wimlib/error.h"
-#include "wimlib/util.h"
#include <string.h>
+#define USE_WORD_FILL
+
#ifdef __GNUC__
# ifdef __SSE2__
+# undef USE_WORD_FILL
# define USE_SSE2_FILL
# include <emmintrin.h>
-# else
-# define USE_LONG_FILL
# endif
#endif
unsigned stores_per_loop;
unsigned decode_table_pos;
-#ifdef USE_LONG_FILL
- const unsigned entries_per_long = sizeof(unsigned long) / sizeof(decode_table[0]);
+#ifdef USE_WORD_FILL
+ const unsigned entries_per_word = WORDSIZE / sizeof(decode_table[0]);
#endif
#ifdef USE_SSE2_FILL
const unsigned entries_per_xmm = sizeof(__m128i) / sizeof(decode_table[0]);
#endif
- /* Check parameters if assertions are enabled. */
- wimlib_assert2((uintptr_t)decode_table % DECODE_TABLE_ALIGNMENT == 0);
- wimlib_assert2(num_syms <= DECODE_TABLE_MAX_SYMBOLS);
- wimlib_assert2(table_bits <= DECODE_TABLE_MAX_TABLE_BITS);
- wimlib_assert2(max_codeword_len <= DECODE_TABLE_MAX_CODEWORD_LEN);
- for (unsigned sym = 0; sym < num_syms; sym++)
- wimlib_assert2(lens[sym] <= max_codeword_len);
-
/* Count how many symbols have each possible codeword length.
* Note that a length of 0 indicates the corresponding symbol is not
* used in the code and therefore does not have a codeword. */
if (unlikely(left < 0)) {
/* The lengths overflow the codespace; that is, the code
* is over-subscribed. */
- DEBUG("Invalid prefix code (over-subscribed)");
return -1;
}
}
table_num_entries * sizeof(decode_table[0]));
return 0;
}
- DEBUG("Invalid prefix code (incomplete set)");
return -1;
}
* have the most entries. From there, the number of entries per
* codeword will decrease. As an optimization, we may begin filling
* entries with SSE2 vector accesses (8 entries/store), then change to
- * 'unsigned long' accesses (2 or 4 entries/store), then change to
+ * 'machine_word_t' accesses (2 or 4 entries/store), then change to
* 16-bit accesses (1 entry/store). */
decode_table_ptr = decode_table;
sym_idx = 0;
for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
for (; sym_idx < end_sym_idx; sym_idx++) {
- /* Note: unlike in the 'long' version below, the __m128i
- * type already has __attribute__((may_alias)), so using
- * it to access the decode table, which is an array of
- * unsigned shorts, will not violate strict aliasing.
- */
+ /* Note: unlike in the machine_word_t version below, the
+ * __m128i type already has __attribute__((may_alias)),
+ * so using it to access the decode table, which is an
+ * array of unsigned shorts, will not violate strict
+ * aliasing. */
u16 entry;
__m128i v;
__m128i *p;
}
#endif /* USE_SSE2_FILL */
-#ifdef USE_LONG_FILL
- /* Fill the entries one 'unsigned long' at a time.
+#ifdef USE_WORD_FILL
+ /* Fill the entries one machine word at a time.
* On 32-bit systems this is 2 entries per store, while on 64-bit
* systems this is 4 entries per store. */
- stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_long;
+ stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_word;
for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
for (; sym_idx < end_sym_idx; sym_idx++) {
- /* Accessing the array of unsigned shorts as unsigned
- * longs would violate strict aliasing and would require
- * compiling the code with -fno-strict-aliasing to
- * guarantee correctness. To work around this problem,
- * use the gcc 'may_alias' extension to define a special
- * unsigned long type that may alias any other in-memory
- * variable. */
- typedef unsigned long __attribute__((may_alias)) aliased_long_t;
-
- unsigned long v;
- aliased_long_t *p;
+ /* Accessing the array of u16 as u32 or u64 would
+ * violate strict aliasing and would require compiling
+ * the code with -fno-strict-aliasing to guarantee
+ * correctness. To work around this problem, use the
+ * gcc 'may_alias' extension. */
+ typedef machine_word_t _may_alias_attribute aliased_word_t;
+
+ machine_word_t v;
+ aliased_word_t *p;
unsigned n;
- BUILD_BUG_ON(sizeof(unsigned long) != 4 &&
- sizeof(unsigned long) != 8);
+ BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
v = MAKE_DIRECT_ENTRY(sorted_syms[sym_idx], codeword_len);
v |= v << 16;
- if (sizeof(unsigned long) == 8) {
- /* This may produce a compiler warning if an
- * 'unsigned long' is 32 bits, but this won't be
- * executed unless an 'unsigned long' is at
- * least 64 bits anyway. */
- v |= v << 32;
- }
+ v |= v << (WORDSIZE == 8 ? 32 : 0);
- p = (aliased_long_t *)decode_table_ptr;
+ p = (aliased_word_t *)decode_table_ptr;
n = stores_per_loop;
do {
decode_table_ptr = p;
}
}
-#endif /* USE_LONG_FILL */
+#endif /* USE_WORD_FILL */
/* Fill the entries one 16-bit integer at a time. */
stores_per_loop = (1 << (table_bits - codeword_len));
}
/* If we've filled in the entire table, we are done. Otherwise,
- * there are codes longer than table_bits for which we must
+ * there are codewords longer than table_bits for which we must
* generate binary trees. */
decode_table_pos = (u16*)decode_table_ptr - decode_table;
unsigned next_free_tree_slot;
unsigned cur_codeword;
- /* First, zero out the rest of the entries. This is
- * necessary so that the entries appear as "unallocated"
- * in the next part. Each of these entries will
- * eventually be filled the representation of the root
- * node of a binary tree. */
+ /* First, zero out the remaining entries. This is
+ * necessary so that these entries appear as
+ * "unallocated" in the next part. Each of these entries
+ * will eventually be filled with the representation of
+ * the root node of a binary tree. */
j = decode_table_pos;
do {
decode_table[j] = 0;
/* We allocate child nodes starting at the end of the
* direct lookup table. Note that there should be
- * 2*num_syms extra entries for this purpose. */
+ * 2*num_syms extra entries for this purpose, although
+ * fewer than this may actually be needed. */
next_free_tree_slot = table_num_entries;
/* Iterate through each codeword with length greater than