/*
* Copyright (C) 2012, 2013, 2014 Eric Biggers
*
- * This file is part of wimlib, a library for working with WIM files.
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
*
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
- *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
*/
/*
#define LZX_PRECODE_TABLEBITS 6
#define LZX_ALIGNEDCODE_TABLEBITS 7
+#define LZX_READ_LENS_MAX_OVERRUN 50
+
/* Huffman decoding tables, and arrays that map symbols to codeword lengths. */
struct lzx_tables {
u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) +
(LZX_MAINCODE_MAX_NUM_SYMBOLS * 2)]
_aligned_attribute(DECODE_TABLE_ALIGNMENT);
- u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS];
+ u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) +
(LZX_LENCODE_NUM_SYMBOLS * 2)]
_aligned_attribute(DECODE_TABLE_ALIGNMENT);
- u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS];
+ u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) +
* @lens:
* An array that contains the length values from the previous time the
* codeword lengths for this Huffman code were read, or all 0's if this is
- * the first time.
+ * the first time. This array must have at least (@num_lens +
+ * LZX_READ_LENS_MAX_OVERRUN) entries.
*
* @num_lens:
* Number of length values to decode.
* Returns 0 on success, or -1 if the data was invalid.
*/
static int
-lzx_read_codeword_lens(struct input_bitstream *istream, u8 lens[], unsigned num_lens)
+lzx_read_codeword_lens(struct input_bitstream *istream, u8 *lens, unsigned num_lens)
{
- /* Declare the decoding table and length table for the precode. */
u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) +
(LZX_PRECODE_NUM_SYMBOLS * 2)]
_aligned_attribute(DECODE_TABLE_ALIGNMENT);
u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
+ u8 *len_ptr = lens;
+ u8 *lens_end = lens + num_lens;
int ret;
/* Read the lengths of the precode codewords. These are given
if (ret)
return ret;
- /* Pointer past the last length value that needs to be filled in. */
- u8 *lens_end = lens + num_lens;
-
- for (;;) {
-
+ /* Decode the codeword lengths. */
+ do {
unsigned presym;
- unsigned run_len;
- signed char value;
-
- /* Decode a symbol from the input.
- *
- * If the symbol is between 0 and 16, it is the difference from
- * the old length, modulo 17.
- *
- * If the symbol is between 17 and 19, it is a special symbol
- * that indicates that some number of the next lengths are all
- * 0, or that some number of the next lengths are all equal to
- * the next symbol. */
+ u8 len;
+ /* Read the next precode symbol. */
presym = read_huffsym_using_precode(istream,
precode_decode_table);
- switch (presym) {
-
- case 17: /* Run of 0's */
- run_len = 4 + bitstream_read_bits(istream, 4);
- do {
- *lens = 0;
- if (++lens == lens_end)
- return 0;
- } while (--run_len);
- break;
+ if (presym < 17) {
+ /* Difference from old length */
+ len = *len_ptr - presym;
+ if ((s8)len < 0)
+ len += 17;
+ *len_ptr++ = len;
+ } else {
+ /* Special RLE values */
+
+ unsigned run_len;
+
+ if (presym == 17) {
+ /* Run of 0's */
+ run_len = 4 + bitstream_read_bits(istream, 4);
+ len = 0;
+ } else if (presym == 18) {
+ /* Longer run of 0's */
+ run_len = 20 + bitstream_read_bits(istream, 5);
+ len = 0;
+ } else {
+ /* Run of identical lengths */
+ run_len = 4 + bitstream_read_bits(istream, 1);
+ presym = read_huffsym_using_precode(istream,
+ precode_decode_table);
+ len = *len_ptr - presym;
+ if ((s8)len < 0)
+ len += 17;
+ }
- case 18: /* Longer run of 0's */
- run_len = 20 + bitstream_read_bits(istream, 5);
do {
- *lens = 0;
- if (++lens == lens_end)
- return 0;
+ *len_ptr++ = len;
} while (--run_len);
- break;
-
- case 19: /* Run of identical lengths */
- run_len = 4 + bitstream_read_bits(istream, 1);
- presym = read_huffsym_using_precode(istream,
- precode_decode_table);
- value = (signed char)*lens - (signed char)presym;
- if (value < 0)
- value += 17;
- do {
- *lens = value;
- if (++lens == lens_end)
- return 0;
- } while (--run_len);
- break;
-
- default: /* Difference from old length */
- value = (signed char)*lens - (signed char)presym;
- if (value < 0)
- value += 17;
- *lens = value;
- if (++lens == lens_end)
- return 0;
- break;
+ /* Worst case overrun is when presym == 18,
+ * run_len == 20 + 31, and only 1 length was remaining.
+ * So LZX_READ_LENS_MAX_OVERRUN == 50.
+ *
+ * Overrun while reading the first half of maincode_lens
+ * can corrupt the previous values in the second half.
+ * This doesn't really matter because the resulting
+ * lengths will still be in range, and data that
+ * generates overruns is invalid anyway. */
}
- }
+ } while (len_ptr < lens_end);
+ return 0;
}
/*
u8 *window_end = window_ptr + block_size;
unsigned mainsym;
u32 match_len;
- unsigned position_slot;
+ unsigned offset_slot;
u32 match_offset;
unsigned num_extra_bits;
+ unsigned ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED);
while (window_ptr != window_end) {
/* Match */
- /* Decode the length header and position slot. */
+ /* Decode the length header and offset slot. */
mainsym -= LZX_NUM_CHARS;
match_len = mainsym & 0x7;
- position_slot = mainsym >> 3;
+ offset_slot = mainsym >> 3;
/* If needed, read a length symbol to decode the full length. */
if (match_len == 0x7)
match_len += read_huffsym_using_lencode(istream, tables);
match_len += LZX_MIN_MATCH_LEN;
- if (position_slot <= 2) {
+ if (offset_slot <= 2) {
/* Repeat offset */
/* Note: This isn't a real LRU queue, since using the R2
* offset doesn't bump the R1 offset down to R2. This
* quirk allows all 3 recent offsets to be handled by
* the same code. (For R0, the swap is a no-op.) */
- match_offset = queue->R[position_slot];
- queue->R[position_slot] = queue->R[0];
+ match_offset = queue->R[offset_slot];
+ queue->R[offset_slot] = queue->R[0];
queue->R[0] = match_offset;
} else {
/* Explicit offset */
/* Look up the number of extra bits that need to be read
- * to decode offsets with this position slot. */
- num_extra_bits = lzx_get_num_extra_bits(position_slot);
+ * to decode offsets with this offset slot. */
+ num_extra_bits = lzx_extra_offset_bits[offset_slot];
- /* Start with the position slot base value. */
- match_offset = lzx_position_base[position_slot];
+ /* Start with the offset slot base value. */
+ match_offset = lzx_offset_slot_base[offset_slot];
/* In aligned offset blocks, the low-order 3 bits of
* each offset are encoded using the aligned offset
* code. Otherwise, all the extra bits are literal. */
- if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3) {
+
+ /*if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3) {*/
+ if ((num_extra_bits & ones_if_aligned) >= 3) {
match_offset += bitstream_read_bits(istream, num_extra_bits - 3) << 3;
match_offset += read_huffsym_using_alignedcode(istream, tables);
} else {
if (unlikely(match_offset > window_ptr - window))
return -1;
- lz_copy(window_ptr, match_len, match_offset, window_end);
+ lz_copy(window_ptr, match_len, match_offset, window_end,
+ LZX_MIN_MATCH_LEN);
window_ptr += match_len;
}
lzx_lru_queue_init(&queue);
/* Codeword lengths begin as all 0's for delta encoding purposes. */
- memset(dec->tables.maincode_lens, 0, sizeof(dec->tables.maincode_lens));
- memset(dec->tables.lencode_lens, 0, sizeof(dec->tables.lencode_lens));
+ memset(dec->tables.maincode_lens, 0, dec->num_main_syms);
+ memset(dec->tables.lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
/* Set this to true if there may be 0xe8 bytes in the uncompressed data.
*/