X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flzx-decompress.c;h=4c7694b6ceace040d84040d3c49064cba119fd57;hb=5b6eaafbaab217fc69946685862a09afa28b30cb;hp=8fc77844fa2125517da8ff869c7fc5ab0b7a7f94;hpb=acb16926b00fabdba062ff4300da5d3aef32ceeb;p=wimlib diff --git a/src/lzx-decompress.c b/src/lzx-decompress.c index 8fc77844..4c7694b6 100644 --- a/src/lzx-decompress.c +++ b/src/lzx-decompress.c @@ -7,20 +7,18 @@ /* * Copyright (C) 2012, 2013, 2014 Eric Biggers * - * This file is part of wimlib, a library for working with WIM files. + * This file is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) any + * later version. * - * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free - * Software Foundation; either version 3 of the License, or (at your option) - * any later version. - * - * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU General Public License for more + * This file is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * - * You should have received a copy of the GNU General Public License - * along with wimlib; if not, see http://www.gnu.org/licenses/. + * You should have received a copy of the GNU Lesser General Public License + * along with this file; if not, see http://www.gnu.org/licenses/. */ /* @@ -70,19 +68,21 @@ #define LZX_PRECODE_TABLEBITS 6 #define LZX_ALIGNEDCODE_TABLEBITS 7 +#define LZX_READ_LENS_MAX_OVERRUN 50 + /* Huffman decoding tables, and arrays that map symbols to codeword lengths. */ struct lzx_tables { u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) + (LZX_MAINCODE_MAX_NUM_SYMBOLS * 2)] _aligned_attribute(DECODE_TABLE_ALIGNMENT); - u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS]; + u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) + (LZX_LENCODE_NUM_SYMBOLS * 2)] _aligned_attribute(DECODE_TABLE_ALIGNMENT); - u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS]; + u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) + @@ -149,7 +149,8 @@ read_huffsym_using_alignedcode(struct input_bitstream *istream, * @lens: * An array that contains the length values from the previous time the * codeword lengths for this Huffman code were read, or all 0's if this is - * the first time. + * the first time. This array must have at least (@num_lens + + * LZX_READ_LENS_MAX_OVERRUN) entries. * * @num_lens: * Number of length values to decode. @@ -157,13 +158,14 @@ read_huffsym_using_alignedcode(struct input_bitstream *istream, * Returns 0 on success, or -1 if the data was invalid. */ static int -lzx_read_codeword_lens(struct input_bitstream *istream, u8 lens[], unsigned num_lens) +lzx_read_codeword_lens(struct input_bitstream *istream, u8 *lens, unsigned num_lens) { - /* Declare the decoding table and length table for the precode. */ u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) + (LZX_PRECODE_NUM_SYMBOLS * 2)] _aligned_attribute(DECODE_TABLE_ALIGNMENT); u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS]; + u8 *len_ptr = lens; + u8 *lens_end = lens + num_lens; int ret; /* Read the lengths of the precode codewords. These are given @@ -182,71 +184,58 @@ lzx_read_codeword_lens(struct input_bitstream *istream, u8 lens[], unsigned num_ if (ret) return ret; - /* Pointer past the last length value that needs to be filled in. */ - u8 *lens_end = lens + num_lens; - - for (;;) { - + /* Decode the codeword lengths. */ + do { unsigned presym; - unsigned run_len; - signed char value; - - /* Decode a symbol from the input. - * - * If the symbol is between 0 and 16, it is the difference from - * the old length, modulo 17. - * - * If the symbol is between 17 and 19, it is a special symbol - * that indicates that some number of the next lengths are all - * 0, or that some number of the next lengths are all equal to - * the next symbol. */ + u8 len; + /* Read the next precode symbol. */ presym = read_huffsym_using_precode(istream, precode_decode_table); - switch (presym) { - - case 17: /* Run of 0's */ - run_len = 4 + bitstream_read_bits(istream, 4); - do { - *lens = 0; - if (++lens == lens_end) - return 0; - } while (--run_len); - break; + if (presym < 17) { + /* Difference from old length */ + len = *len_ptr - presym; + if ((s8)len < 0) + len += 17; + *len_ptr++ = len; + } else { + /* Special RLE values */ + + unsigned run_len; + + if (presym == 17) { + /* Run of 0's */ + run_len = 4 + bitstream_read_bits(istream, 4); + len = 0; + } else if (presym == 18) { + /* Longer run of 0's */ + run_len = 20 + bitstream_read_bits(istream, 5); + len = 0; + } else { + /* Run of identical lengths */ + run_len = 4 + bitstream_read_bits(istream, 1); + presym = read_huffsym_using_precode(istream, + precode_decode_table); + len = *len_ptr - presym; + if ((s8)len < 0) + len += 17; + } - case 18: /* Longer run of 0's */ - run_len = 20 + bitstream_read_bits(istream, 5); do { - *lens = 0; - if (++lens == lens_end) - return 0; + *len_ptr++ = len; } while (--run_len); - break; - - case 19: /* Run of identical lengths */ - run_len = 4 + bitstream_read_bits(istream, 1); - presym = read_huffsym_using_precode(istream, - precode_decode_table); - value = (signed char)*lens - (signed char)presym; - if (value < 0) - value += 17; - do { - *lens = value; - if (++lens == lens_end) - return 0; - } while (--run_len); - break; - - default: /* Difference from old length */ - value = (signed char)*lens - (signed char)presym; - if (value < 0) - value += 17; - *lens = value; - if (++lens == lens_end) - return 0; - break; + /* Worst case overrun is when presym == 18, + * run_len == 20 + 31, and only 1 length was remaining. + * So LZX_READ_LENS_MAX_OVERRUN == 50. + * + * Overrun while reading the first half of maincode_lens + * can corrupt the previous values in the second half. + * This doesn't really matter because the resulting + * lengths will still be in range, and data that + * generates overruns is invalid anyway. */ } - } + } while (len_ptr < lens_end); + return 0; } /* @@ -436,9 +425,10 @@ lzx_decompress_block(int block_type, u32 block_size, u8 *window_end = window_ptr + block_size; unsigned mainsym; u32 match_len; - unsigned position_slot; + unsigned offset_slot; u32 match_offset; unsigned num_extra_bits; + unsigned ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED); while (window_ptr != window_end) { @@ -451,40 +441,42 @@ lzx_decompress_block(int block_type, u32 block_size, /* Match */ - /* Decode the length header and position slot. */ + /* Decode the length header and offset slot. */ mainsym -= LZX_NUM_CHARS; match_len = mainsym & 0x7; - position_slot = mainsym >> 3; + offset_slot = mainsym >> 3; /* If needed, read a length symbol to decode the full length. */ if (match_len == 0x7) match_len += read_huffsym_using_lencode(istream, tables); match_len += LZX_MIN_MATCH_LEN; - if (position_slot <= 2) { + if (offset_slot <= 2) { /* Repeat offset */ /* Note: This isn't a real LRU queue, since using the R2 * offset doesn't bump the R1 offset down to R2. This * quirk allows all 3 recent offsets to be handled by * the same code. (For R0, the swap is a no-op.) */ - match_offset = queue->R[position_slot]; - queue->R[position_slot] = queue->R[0]; + match_offset = queue->R[offset_slot]; + queue->R[offset_slot] = queue->R[0]; queue->R[0] = match_offset; } else { /* Explicit offset */ /* Look up the number of extra bits that need to be read - * to decode offsets with this position slot. */ - num_extra_bits = lzx_get_num_extra_bits(position_slot); + * to decode offsets with this offset slot. */ + num_extra_bits = lzx_extra_offset_bits[offset_slot]; - /* Start with the position slot base value. */ - match_offset = lzx_position_base[position_slot]; + /* Start with the offset slot base value. */ + match_offset = lzx_offset_slot_base[offset_slot]; /* In aligned offset blocks, the low-order 3 bits of * each offset are encoded using the aligned offset * code. Otherwise, all the extra bits are literal. */ - if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3) { + + /*if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3) {*/ + if ((num_extra_bits & ones_if_aligned) >= 3) { match_offset += bitstream_read_bits(istream, num_extra_bits - 3) << 3; match_offset += read_huffsym_using_alignedcode(istream, tables); } else { @@ -535,8 +527,8 @@ lzx_decompress(const void *compressed_data, size_t compressed_size, lzx_lru_queue_init(&queue); /* Codeword lengths begin as all 0's for delta encoding purposes. */ - memset(dec->tables.maincode_lens, 0, sizeof(dec->tables.maincode_lens)); - memset(dec->tables.lencode_lens, 0, sizeof(dec->tables.lencode_lens)); + memset(dec->tables.maincode_lens, 0, dec->num_main_syms); + memset(dec->tables.lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS); /* Set this to true if there may be 0xe8 bytes in the uncompressed data. */