X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fdecompress_common.c;h=49cd5ba0f083afa64248246080830fd93dfbb589;hp=9743c7d0da6e9748d3e525839ce48fc3702d27d9;hb=a8343baff22193be4651a3a63b07cb1520ced4e8;hpb=67c5f2c5954d07c25f57b6d9aedb73fb3720ec0b diff --git a/src/decompress_common.c b/src/decompress_common.c index 9743c7d0..49cd5ba0 100644 --- a/src/decompress_common.c +++ b/src/decompress_common.c @@ -2,25 +2,12 @@ * decompress_common.c * * Code for decompression shared among multiple compression formats. - */ - -/* - * Copyright (C) 2012, 2013, 2014 Eric Biggers - * - * This file is part of wimlib, a library for working with WIM files. - * - * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free - * Software Foundation; either version 3 of the License, or (at your option) - * any later version. * - * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. + * Author: Eric Biggers + * Year: 2012 - 2014 * - * You should have received a copy of the GNU General Public License - * along with wimlib; if not, see http://www.gnu.org/licenses/. + * The author dedicates this file to the public domain. + * You can do whatever you want with this file. */ #ifdef HAVE_CONFIG_H @@ -28,17 +15,16 @@ #endif #include "wimlib/decompress_common.h" -#include "wimlib/error.h" -#include "wimlib/util.h" #include +#define USE_WORD_FILL + #ifdef __GNUC__ # ifdef __SSE2__ +# undef USE_WORD_FILL # define USE_SSE2_FILL # include -# else -# define USE_LONG_FILL # endif #endif @@ -156,22 +142,14 @@ make_huffman_decode_table(u16 decode_table[const restrict], unsigned stores_per_loop; unsigned decode_table_pos; -#ifdef USE_LONG_FILL - const unsigned entries_per_long = sizeof(unsigned long) / sizeof(decode_table[0]); +#ifdef USE_WORD_FILL + const unsigned entries_per_word = WORDSIZE / sizeof(decode_table[0]); #endif #ifdef USE_SSE2_FILL const unsigned entries_per_xmm = sizeof(__m128i) / sizeof(decode_table[0]); #endif - /* Check parameters if assertions are enabled. */ - wimlib_assert2((uintptr_t)decode_table % DECODE_TABLE_ALIGNMENT == 0); - wimlib_assert2(num_syms <= DECODE_TABLE_MAX_SYMBOLS); - wimlib_assert2(table_bits <= DECODE_TABLE_MAX_TABLE_BITS); - wimlib_assert2(max_codeword_len <= DECODE_TABLE_MAX_CODEWORD_LEN); - for (unsigned sym = 0; sym < num_syms; sym++) - wimlib_assert2(lens[sym] <= max_codeword_len); - /* Count how many symbols have each possible codeword length. * Note that a length of 0 indicates the corresponding symbol is not * used in the code and therefore does not have a codeword. */ @@ -192,7 +170,6 @@ make_huffman_decode_table(u16 decode_table[const restrict], if (unlikely(left < 0)) { /* The lengths overflow the codespace; that is, the code * is over-subscribed. */ - DEBUG("Invalid prefix code (over-subscribed)"); return -1; } } @@ -214,7 +191,6 @@ make_huffman_decode_table(u16 decode_table[const restrict], table_num_entries * sizeof(decode_table[0])); return 0; } - DEBUG("Invalid prefix code (incomplete set)"); return -1; } @@ -246,7 +222,7 @@ make_huffman_decode_table(u16 decode_table[const restrict], * have the most entries. From there, the number of entries per * codeword will decrease. As an optimization, we may begin filling * entries with SSE2 vector accesses (8 entries/store), then change to - * 'unsigned long' accesses (2 or 4 entries/store), then change to + * 'machine_word_t' accesses (2 or 4 entries/store), then change to * 16-bit accesses (1 entry/store). */ decode_table_ptr = decode_table; sym_idx = 0; @@ -258,11 +234,11 @@ make_huffman_decode_table(u16 decode_table[const restrict], for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; for (; sym_idx < end_sym_idx; sym_idx++) { - /* Note: unlike in the 'long' version below, the __m128i - * type already has __attribute__((may_alias)), so using - * it to access the decode table, which is an array of - * unsigned shorts, will not violate strict aliasing. - */ + /* Note: unlike in the machine_word_t version below, the + * __m128i type already has __attribute__((may_alias)), + * so using it to access the decode table, which is an + * array of unsigned shorts, will not violate strict + * aliasing. */ u16 entry; __m128i v; __m128i *p; @@ -281,42 +257,33 @@ make_huffman_decode_table(u16 decode_table[const restrict], } #endif /* USE_SSE2_FILL */ -#ifdef USE_LONG_FILL - /* Fill the entries one 'unsigned long' at a time. +#ifdef USE_WORD_FILL + /* Fill the entries one machine word at a time. * On 32-bit systems this is 2 entries per store, while on 64-bit * systems this is 4 entries per store. */ - stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_long; + stores_per_loop = (1 << (table_bits - codeword_len)) / entries_per_word; for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; for (; sym_idx < end_sym_idx; sym_idx++) { - /* Accessing the array of unsigned shorts as unsigned - * longs would violate strict aliasing and would require - * compiling the code with -fno-strict-aliasing to - * guarantee correctness. To work around this problem, - * use the gcc 'may_alias' extension to define a special - * unsigned long type that may alias any other in-memory - * variable. */ - typedef unsigned long __attribute__((may_alias)) aliased_long_t; - - unsigned long v; - aliased_long_t *p; + /* Accessing the array of u16 as u32 or u64 would + * violate strict aliasing and would require compiling + * the code with -fno-strict-aliasing to guarantee + * correctness. To work around this problem, use the + * gcc 'may_alias' extension. */ + typedef machine_word_t _may_alias_attribute aliased_word_t; + + machine_word_t v; + aliased_word_t *p; unsigned n; - BUILD_BUG_ON(sizeof(unsigned long) != 4 && - sizeof(unsigned long) != 8); + BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8); v = MAKE_DIRECT_ENTRY(sorted_syms[sym_idx], codeword_len); v |= v << 16; - if (sizeof(unsigned long) == 8) { - /* This may produce a compiler warning if an - * 'unsigned long' is 32 bits, but this won't be - * executed unless an 'unsigned long' is at - * least 64 bits anyway. */ - v |= v << 32; - } + v |= v << (WORDSIZE == 8 ? 32 : 0); - p = (aliased_long_t *)decode_table_ptr; + p = (aliased_word_t *)decode_table_ptr; n = stores_per_loop; do { @@ -325,7 +292,7 @@ make_huffman_decode_table(u16 decode_table[const restrict], decode_table_ptr = p; } } -#endif /* USE_LONG_FILL */ +#endif /* USE_WORD_FILL */ /* Fill the entries one 16-bit integer at a time. */ stores_per_loop = (1 << (table_bits - codeword_len)); @@ -350,7 +317,7 @@ make_huffman_decode_table(u16 decode_table[const restrict], } /* If we've filled in the entire table, we are done. Otherwise, - * there are codes longer than table_bits for which we must + * there are codewords longer than table_bits for which we must * generate binary trees. */ decode_table_pos = (u16*)decode_table_ptr - decode_table; @@ -359,11 +326,11 @@ make_huffman_decode_table(u16 decode_table[const restrict], unsigned next_free_tree_slot; unsigned cur_codeword; - /* First, zero out the rest of the entries. This is - * necessary so that the entries appear as "unallocated" - * in the next part. Each of these entries will - * eventually be filled the representation of the root - * node of a binary tree. */ + /* First, zero out the remaining entries. This is + * necessary so that these entries appear as + * "unallocated" in the next part. Each of these entries + * will eventually be filled with the representation of + * the root node of a binary tree. */ j = decode_table_pos; do { decode_table[j] = 0; @@ -371,7 +338,8 @@ make_huffman_decode_table(u16 decode_table[const restrict], /* We allocate child nodes starting at the end of the * direct lookup table. Note that there should be - * 2*num_syms extra entries for this purpose. */ + * 2*num_syms extra entries for this purpose, although + * fewer than this may actually be needed. */ next_free_tree_slot = table_num_entries; /* Iterate through each codeword with length greater than