Fix various typos

[wimlib] / src / decompress_common.c
diff --git a/src/decompress_common.c b/src/decompress_common.c

index 2c3da1611324a00c4250794145a923688d7a595c..76fb1278ab0efed9cdd2ca8cffe2bd5ecd10d8b3 100644 (file)
--- a/src/decompress_common.c
+++ b/src/decompress_common.c
@@ -2,25 +2,12 @@
   * decompress_common.c
   *
   * Code for decompression shared among multiple compression formats.
- */
-
-/*
- * Copyright (C) 2012, 2013, 2014 Eric Biggers
- *
- * This file is part of wimlib, a library for working with WIM files.
- *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
   *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
+ * Author:  Eric Biggers
+ * Year:    2012 - 2014
   *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * The author dedicates this file to the public domain.
+ * You can do whatever you want with this file.
   */
  
  #ifdef HAVE_CONFIG_H
@@ -28,16 +15,16 @@
  #endif
  
  #include "wimlib/decompress_common.h"
-#include "wimlib/error.h"
  
  #include <string.h>
  
+#define USE_WORD_FILL
+
  #ifdef __GNUC__
  #  ifdef __SSE2__
+#    undef USE_WORD_FILL
  #    define USE_SSE2_FILL
  #    include <emmintrin.h>
-#  else
-#    define USE_LONG_FILL
  #  endif
  #endif
  
@@ -111,9 +98,11 @@
   *   binary tree.
   *
   * @decode_table:
- *     The array in which to create the decoding table.
- *     This must be 16-byte aligned and must have a length of at least
- *     ((2**table_bits) + 2 * num_syms) entries.
+ *     The array in which to create the decoding table.  This must be
+ *     16-byte aligned and must have a length of at least
+ *     ((2**table_bits) + 2 * num_syms) entries.  This is permitted to
+ *     alias @lens, since all information from @lens is consumed before
+*      anything is written to @decode_table.
   *
   * @num_syms:
   *     The number of symbols in the alphabet; also, the length of the
@@ -128,7 +117,9 @@
   *     An array of length @num_syms, indexable by symbol, that gives the
   *     length of the codeword, in bits, for that symbol.  The length can
   *     be 0, which means that the symbol does not have a codeword
- *     assigned.
+ *     assigned.  This is permitted to alias @decode_table, since all
+ *     information from @lens is consumed before anything is written to
+ *     @decode_table.
   *
   * @max_codeword_len:
   *     The longest codeword length allowed in the compression format.
@@ -139,10 +130,10 @@
   * code.
   */
  int
-make_huffman_decode_table(u16 decode_table[const restrict],
+make_huffman_decode_table(u16 decode_table[const],
                           const unsigned num_syms,
                           const unsigned table_bits,
-                         const u8 lens[const restrict],
+                         const u8 lens[const],
                           const unsigned max_codeword_len)
  {
         const unsigned table_num_entries = 1 << table_bits;
@@ -155,22 +146,14 @@ make_huffman_decode_table(u16 decode_table[const restrict],
         unsigned stores_per_loop;
         unsigned decode_table_pos;
  
-#ifdef USE_LONG_FILL
-       const unsigned entries_per_long = sizeof(unsigned long) / sizeof(decode_table[0]);
+#ifdef USE_WORD_FILL
+       const unsigned entries_per_word = WORDSIZE / sizeof(decode_table[0]);
  #endif
  
  #ifdef USE_SSE2_FILL
         const unsigned entries_per_xmm = sizeof(__m128i) / sizeof(decode_table[0]);
  #endif
  
-       /* Check parameters if assertions are enabled.  */
-       wimlib_assert2((uintptr_t)decode_table % DECODE_TABLE_ALIGNMENT == 0);
-       wimlib_assert2(num_syms <= DECODE_TABLE_MAX_SYMBOLS);
-       wimlib_assert2(table_bits <= DECODE_TABLE_MAX_TABLE_BITS);
-       wimlib_assert2(max_codeword_len <= DECODE_TABLE_MAX_CODEWORD_LEN);
-       for (unsigned sym = 0; sym < num_syms; sym++)
-               wimlib_assert2(lens[sym] <= max_codeword_len);
-
         /* Count how many symbols have each possible codeword length.
          * Note that a length of 0 indicates the corresponding symbol is not
          * used in the code and therefore does not have a codeword.  */
@@ -191,7 +174,6 @@ make_huffman_decode_table(u16 decode_table[const restrict],
                 if (unlikely(left < 0)) {
                         /* The lengths overflow the codespace; that is, the code
                          * is over-subscribed.  */
-                       DEBUG("Invalid prefix code (over-subscribed)");
                         return -1;
                 }
         }
@@ -213,7 +195,6 @@ make_huffman_decode_table(u16 decode_table[const restrict],
                                table_num_entries * sizeof(decode_table[0]));
                         return 0;
                 }
-               DEBUG("Invalid prefix code (incomplete set)");
                 return -1;
         }
  
@@ -245,7 +226,7 @@ make_huffman_decode_table(u16 decode_table[const restrict],
          * have the most entries.  From there, the number of entries per
          * codeword will decrease.  As an optimization, we may begin filling
          * entries with SSE2 vector accesses (8 entries/store), then change to
-        * 'unsigned long' accesses (2 or 4 entries/store), then change to
+        * 'machine_word_t' accesses (2 or 4 entries/store), then change to
          * 16-bit accesses (1 entry/store).  */
         decode_table_ptr = decode_table;
         sym_idx = 0;
@@ -257,11 +238,11 @@ make_huffman_decode_table(u16 decode_table[const restrict],
         for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
                 unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
                 for (; sym_idx < end_sym_idx; sym_idx++) {
-                       /* Note: unlike in the 'long' version below, the __m128i
-                        * type already has __attribute__((may_alias)), so using
-                        * it to access the decode table, which is an array of
-                        * unsigned shorts, will not violate strict aliasing.
-                        */
+                       /* Note: unlike in the machine_word_t version below, the
+                        * __m128i type already has __attribute__((may_alias)),
+                        * so using it to access the decode table, which is an
+                        * array of unsigned shorts, will not violate strict
+                        * aliasing.  */
                         u16 entry;
                         __m128i v;
                         __m128i *p;
@@ -280,42 +261,33 @@ make_huffman_decode_table(u16 decode_table[const restrict],
         }
  #endif /* USE_SSE2_FILL */
  
-#ifdef USE_LONG_FILL
-       /* Fill the entries one 'unsigned long' at a time.
+#ifdef USE_WORD_FILL
+       /* Fill the entries one machine word at a time.
          * On 32-bit systems this is 2 entries per store, while on 64-bit
          * systems this is 4 entries per store.  */
-       stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_long;
+       stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_word;
         for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
                 unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
                 for (; sym_idx < end_sym_idx; sym_idx++) {
  
-                       /* Accessing the array of unsigned shorts as unsigned
-                        * longs would violate strict aliasing and would require
-                        * compiling the code with -fno-strict-aliasing to
-                        * guarantee correctness.  To work around this problem,
-                        * use the gcc 'may_alias' extension to define a special
-                        * unsigned long type that may alias any other in-memory
-                        * variable.  */
-                       typedef unsigned long __attribute__((may_alias)) aliased_long_t;
-
-                       unsigned long v;
-                       aliased_long_t *p;
+                       /* Accessing the array of u16 as u32 or u64 would
+                        * violate strict aliasing and would require compiling
+                        * the code with -fno-strict-aliasing to guarantee
+                        * correctness.  To work around this problem, use the
+                        * gcc 'may_alias' extension.  */
+                       typedef machine_word_t _may_alias_attribute aliased_word_t;
+
+                       machine_word_t v;
+                       aliased_word_t *p;
                         unsigned n;
  
-                       BUILD_BUG_ON(sizeof(unsigned long) != 4 &&
-                                    sizeof(unsigned long) != 8);
+                       STATIC_ASSERT(WORDSIZE == 4 || WORDSIZE == 8);
  
                         v = MAKE_DIRECT_ENTRY(sorted_syms[sym_idx], codeword_len);
                         v |= v << 16;
-                       if (sizeof(unsigned long) == 8) {
-                               /* This may produce a compiler warning if an
-                                * 'unsigned long' is 32 bits, but this won't be
-                                * executed unless an 'unsigned long' is at
-                                * least 64 bits anyway.  */
-                               v |= v << 32;
-                       }
+                       v |= v << (WORDSIZE == 8 ? 32 : 0);
  
-                       p = (aliased_long_t *)decode_table_ptr;
+                       p = (aliased_word_t *)decode_table_ptr;
                         n = stores_per_loop;
  
                         do {
@@ -324,7 +296,7 @@ make_huffman_decode_table(u16 decode_table[const restrict],
                         decode_table_ptr = p;
                 }
         }
-#endif /* USE_LONG_FILL */
+#endif /* USE_WORD_FILL */
  
         /* Fill the entries one 16-bit integer at a time.  */
         stores_per_loop = (1 << (table_bits - codeword_len));
@@ -405,7 +377,7 @@ make_huffman_decode_table(u16 decode_table[const restrict],
                                  * of as simply the root of the tree.  The
                                  * representation of these internal nodes is
                                  * simply the index of the left child combined
-                                * with the special bits 0xC000 to distingush
+                                * with the special bits 0xC000 to distinguish
                                  * the entry from direct mapping and leaf node
                                  * entries.  */
                                 do {