X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flzx-decompress.c;h=4c7694b6ceace040d84040d3c49064cba119fd57;hb=5b6eaafbaab217fc69946685862a09afa28b30cb;hp=8fc77844fa2125517da8ff869c7fc5ab0b7a7f94;hpb=acb16926b00fabdba062ff4300da5d3aef32ceeb;p=wimlib

diff --git a/src/lzx-decompress.c b/src/lzx-decompress.c
index 8fc77844..4c7694b6 100644
--- a/src/lzx-decompress.c
+++ b/src/lzx-decompress.c
@@ -7,20 +7,18 @@
 /*
  * Copyright (C) 2012, 2013, 2014 Eric Biggers
  *
- * This file is part of wimlib, a library for working with WIM files.
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
  *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
- *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  * details.
  *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
  */
 
 /*
@@ -70,19 +68,21 @@
 #define LZX_PRECODE_TABLEBITS		6
 #define LZX_ALIGNEDCODE_TABLEBITS	7
 
+#define LZX_READ_LENS_MAX_OVERRUN 50
+
 /* Huffman decoding tables, and arrays that map symbols to codeword lengths.  */
 struct lzx_tables {
 
 	u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) +
 					(LZX_MAINCODE_MAX_NUM_SYMBOLS * 2)]
 					_aligned_attribute(DECODE_TABLE_ALIGNMENT);
-	u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS];
+	u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
 
 
 	u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) +
 					(LZX_LENCODE_NUM_SYMBOLS * 2)]
 					_aligned_attribute(DECODE_TABLE_ALIGNMENT);
-	u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS];
+	u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
 
 
 	u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) +
@@ -149,7 +149,8 @@ read_huffsym_using_alignedcode(struct input_bitstream *istream,
  * @lens:
  *	An array that contains the length values from the previous time the
  *	codeword lengths for this Huffman code were read, or all 0's if this is
- *	the first time.
+ *	the first time.  This array must have at least (@num_lens +
+ *	LZX_READ_LENS_MAX_OVERRUN) entries.
  *
  * @num_lens:
  *	Number of length values to decode.
@@ -157,13 +158,14 @@ read_huffsym_using_alignedcode(struct input_bitstream *istream,
  * Returns 0 on success, or -1 if the data was invalid.
  */
 static int
-lzx_read_codeword_lens(struct input_bitstream *istream, u8 lens[], unsigned num_lens)
+lzx_read_codeword_lens(struct input_bitstream *istream, u8 *lens, unsigned num_lens)
 {
-	/* Declare the decoding table and length table for the precode.  */
 	u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) +
 					(LZX_PRECODE_NUM_SYMBOLS * 2)]
 					_aligned_attribute(DECODE_TABLE_ALIGNMENT);
 	u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
+	u8 *len_ptr = lens;
+	u8 *lens_end = lens + num_lens;
 	int ret;
 
 	/* Read the lengths of the precode codewords.  These are given
@@ -182,71 +184,58 @@ lzx_read_codeword_lens(struct input_bitstream *istream, u8 lens[], unsigned num_
 	if (ret)
 		return ret;
 
-	/* Pointer past the last length value that needs to be filled in.  */
-	u8 *lens_end = lens + num_lens;
-
-	for (;;) {
-
+	/* Decode the codeword lengths.  */
+	do {
 		unsigned presym;
-		unsigned run_len;
-		signed char value;
-
-		/* Decode a symbol from the input.
-		 *
-		 * If the symbol is between 0 and 16, it is the difference from
-		 * the old length, modulo 17.
-		 *
-		 * If the symbol is between 17 and 19, it is a special symbol
-		 * that indicates that some number of the next lengths are all
-		 * 0, or that some number of the next lengths are all equal to
-		 * the next symbol.  */
+		u8 len;
 
+		/* Read the next precode symbol.  */
 		presym = read_huffsym_using_precode(istream,
 						    precode_decode_table);
-		switch (presym) {
-
-		case 17: /* Run of 0's  */
-			run_len = 4 + bitstream_read_bits(istream, 4);
-			do {
-				*lens = 0;
-				if (++lens == lens_end)
-					return 0;
-			} while (--run_len);
-			break;
+		if (presym < 17) {
+			/* Difference from old length  */
+			len = *len_ptr - presym;
+			if ((s8)len < 0)
+				len += 17;
+			*len_ptr++ = len;
+		} else {
+			/* Special RLE values  */
+
+			unsigned run_len;
+
+			if (presym == 17) {
+				/* Run of 0's  */
+				run_len = 4 + bitstream_read_bits(istream, 4);
+				len = 0;
+			} else if (presym == 18) {
+				/* Longer run of 0's  */
+				run_len = 20 + bitstream_read_bits(istream, 5);
+				len = 0;
+			} else {
+				/* Run of identical lengths  */
+				run_len = 4 + bitstream_read_bits(istream, 1);
+				presym = read_huffsym_using_precode(istream,
+								    precode_decode_table);
+				len = *len_ptr - presym;
+				if ((s8)len < 0)
+					len += 17;
+			}
 
-		case 18: /* Longer run of 0's  */
-			run_len = 20 + bitstream_read_bits(istream, 5);
 			do {
-				*lens = 0;
-				if (++lens == lens_end)
-					return 0;
+				*len_ptr++ = len;
 			} while (--run_len);
-			break;
-
-		case 19: /* Run of identical lengths  */
-			run_len = 4 + bitstream_read_bits(istream, 1);
-			presym = read_huffsym_using_precode(istream,
-							    precode_decode_table);
-			value = (signed char)*lens - (signed char)presym;
-			if (value < 0)
-				value += 17;
-			do {
-				*lens = value;
-				if (++lens == lens_end)
-					return 0;
-			} while (--run_len);
-			break;
-
-		default: /* Difference from old length  */
-			value = (signed char)*lens - (signed char)presym;
-			if (value < 0)
-				value += 17;
-			*lens = value;
-			if (++lens == lens_end)
-				return 0;
-			break;
+			/* Worst case overrun is when presym == 18,
+			 * run_len == 20 + 31, and only 1 length was remaining.
+			 * So LZX_READ_LENS_MAX_OVERRUN == 50.
+			 *
+			 * Overrun while reading the first half of maincode_lens
+			 * can corrupt the previous values in the second half.
+			 * This doesn't really matter because the resulting
+			 * lengths will still be in range, and data that
+			 * generates overruns is invalid anyway.  */
 		}
-	}
+	} while (len_ptr < lens_end);
+	return 0;
 }
 
 /*
@@ -436,9 +425,10 @@ lzx_decompress_block(int block_type, u32 block_size,
 	u8 *window_end = window_ptr + block_size;
 	unsigned mainsym;
 	u32 match_len;
-	unsigned position_slot;
+	unsigned offset_slot;
 	u32 match_offset;
 	unsigned num_extra_bits;
+	unsigned ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED);
 
 	while (window_ptr != window_end) {
 
@@ -451,40 +441,42 @@ lzx_decompress_block(int block_type, u32 block_size,
 
 		/* Match  */
 
-		/* Decode the length header and position slot.  */
+		/* Decode the length header and offset slot.  */
 		mainsym -= LZX_NUM_CHARS;
 		match_len = mainsym & 0x7;
-		position_slot = mainsym >> 3;
+		offset_slot = mainsym >> 3;
 
 		/* If needed, read a length symbol to decode the full length. */
 		if (match_len == 0x7)
 			match_len += read_huffsym_using_lencode(istream, tables);
 		match_len += LZX_MIN_MATCH_LEN;
 
-		if (position_slot <= 2) {
+		if (offset_slot <= 2) {
 			/* Repeat offset  */
 
 			/* Note: This isn't a real LRU queue, since using the R2
 			 * offset doesn't bump the R1 offset down to R2.  This
 			 * quirk allows all 3 recent offsets to be handled by
 			 * the same code.  (For R0, the swap is a no-op.)  */
-			match_offset = queue->R[position_slot];
-			queue->R[position_slot] = queue->R[0];
+			match_offset = queue->R[offset_slot];
+			queue->R[offset_slot] = queue->R[0];
 			queue->R[0] = match_offset;
 		} else {
 			/* Explicit offset  */
 
 			/* Look up the number of extra bits that need to be read
-			 * to decode offsets with this position slot.  */
-			num_extra_bits = lzx_get_num_extra_bits(position_slot);
+			 * to decode offsets with this offset slot.  */
+			num_extra_bits = lzx_extra_offset_bits[offset_slot];
 
-			/* Start with the position slot base value.  */
-			match_offset = lzx_position_base[position_slot];
+			/* Start with the offset slot base value.  */
+			match_offset = lzx_offset_slot_base[offset_slot];
 
 			/* In aligned offset blocks, the low-order 3 bits of
 			 * each offset are encoded using the aligned offset
 			 * code.  Otherwise, all the extra bits are literal.  */
-			if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3) {
+
+			/*if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3) {*/
+			if ((num_extra_bits & ones_if_aligned) >= 3) {
 				match_offset += bitstream_read_bits(istream, num_extra_bits - 3) << 3;
 				match_offset += read_huffsym_using_alignedcode(istream, tables);
 			} else {
@@ -535,8 +527,8 @@ lzx_decompress(const void *compressed_data, size_t compressed_size,
 	lzx_lru_queue_init(&queue);
 
 	/* Codeword lengths begin as all 0's for delta encoding purposes.  */
-	memset(dec->tables.maincode_lens, 0, sizeof(dec->tables.maincode_lens));
-	memset(dec->tables.lencode_lens, 0, sizeof(dec->tables.lencode_lens));
+	memset(dec->tables.maincode_lens, 0, dec->num_main_syms);
+	memset(dec->tables.lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
 
 	/* Set this to true if there may be 0xe8 bytes in the uncompressed data.
 	 */