win32_capture.c: Fix format string

[wimlib] / src / lzx-decompress.c
diff --git a/src/lzx-decompress.c b/src/lzx-decompress.c

index f98b630d7ecdb9b025934c60de415f33add6e747..58d1f6b1e75fdcca283a0863534462312a52e44e 100644 (file)
--- a/src/lzx-decompress.c
+++ b/src/lzx-decompress.c
@@ -70,19 +70,21 @@
  #define LZX_PRECODE_TABLEBITS          6
  #define LZX_ALIGNEDCODE_TABLEBITS      7
  
+#define LZX_READ_LENS_MAX_OVERRUN 50
+
  /* Huffman decoding tables, and arrays that map symbols to codeword lengths.  */
  struct lzx_tables {
  
         u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) +
                                         (LZX_MAINCODE_MAX_NUM_SYMBOLS * 2)]
                                         _aligned_attribute(DECODE_TABLE_ALIGNMENT);
-       u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS];
+       u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
  
  
         u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) +
                                         (LZX_LENCODE_NUM_SYMBOLS * 2)]
                                         _aligned_attribute(DECODE_TABLE_ALIGNMENT);
-       u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS];
+       u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
  
  
         u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) +
@@ -149,7 +151,8 @@ read_huffsym_using_alignedcode(struct input_bitstream *istream,
   * @lens:
   *     An array that contains the length values from the previous time the
   *     codeword lengths for this Huffman code were read, or all 0's if this is
- *     the first time.
+ *     the first time.  This array must have at least (@num_lens +
+ *     LZX_READ_LENS_MAX_OVERRUN) entries.
   *
   * @num_lens:
   *     Number of length values to decode.
@@ -157,13 +160,14 @@ read_huffsym_using_alignedcode(struct input_bitstream *istream,
   * Returns 0 on success, or -1 if the data was invalid.
   */
  static int
-lzx_read_codeword_lens(struct input_bitstream *istream, u8 lens[], unsigned num_lens)
+lzx_read_codeword_lens(struct input_bitstream *istream, u8 *lens, unsigned num_lens)
  {
-       /* Declare the decoding table and length table for the precode.  */
         u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) +
                                         (LZX_PRECODE_NUM_SYMBOLS * 2)]
                                         _aligned_attribute(DECODE_TABLE_ALIGNMENT);
         u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
+       u8 *len_ptr = lens;
+       u8 *lens_end = lens + num_lens;
         int ret;
  
         /* Read the lengths of the precode codewords.  These are given
@@ -182,71 +186,58 @@ lzx_read_codeword_lens(struct input_bitstream *istream, u8 lens[], unsigned num_
         if (ret)
                 return ret;
  
-       /* Pointer past the last length value that needs to be filled in.  */
-       u8 *lens_end = lens + num_lens;
-
-       for (;;) {
-
+       /* Decode the codeword lengths.  */
+       do {
                 unsigned presym;
-               unsigned run_len;
-               signed char value;
-
-               /* Decode a symbol from the input.
-                *
-                * If the symbol is between 0 and 16, it is the difference from
-                * the old length, modulo 17.
-                *
-                * If the symbol is between 17 and 19, it is a special symbol
-                * that indicates that some number of the next lengths are all
-                * 0, or that some number of the next lengths are all equal to
-                * the next symbol.  */
+               u8 len;
  
+               /* Read the next precode symbol.  */
                 presym = read_huffsym_using_precode(istream,
                                                     precode_decode_table);
-               switch (presym) {
-
-               case 17: /* Run of 0's  */
-                       run_len = 4 + bitstream_read_bits(istream, 4);
-                       do {
-                               *lens = 0;
-                               if (++lens == lens_end)
-                                       return 0;
-                       } while (--run_len);
-                       break;
+               if (presym < 17) {
+                       /* Difference from old length  */
+                       len = *len_ptr - presym;
+                       if ((s8)len < 0)
+                               len += 17;
+                       *len_ptr++ = len;
+               } else {
+                       /* Special RLE values  */
+
+                       unsigned run_len;
+
+                       if (presym == 17) {
+                               /* Run of 0's  */
+                               run_len = 4 + bitstream_read_bits(istream, 4);
+                               len = 0;
+                       } else if (presym == 18) {
+                               /* Longer run of 0's  */
+                               run_len = 20 + bitstream_read_bits(istream, 5);
+                               len = 0;
+                       } else {
+                               /* Run of identical lengths  */
+                               run_len = 4 + bitstream_read_bits(istream, 1);
+                               presym = read_huffsym_using_precode(istream,
+                                                                   precode_decode_table);
+                               len = *len_ptr - presym;
+                               if ((s8)len < 0)
+                                       len += 17;
+                       }
  
-               case 18: /* Longer run of 0's  */
-                       run_len = 20 + bitstream_read_bits(istream, 5);
                         do {
-                               *lens = 0;
-                               if (++lens == lens_end)
-                                       return 0;
+                               *len_ptr++ = len;
                         } while (--run_len);
-                       break;
-
-               case 19: /* Run of identical lengths  */
-                       run_len = 4 + bitstream_read_bits(istream, 1);
-                       presym = read_huffsym_using_precode(istream,
-                                                           precode_decode_table);
-                       value = (signed char)*lens - (signed char)presym;
-                       if (value < 0)
-                               value += 17;
-                       do {
-                               *lens = value;
-                               if (++lens == lens_end)
-                                       return 0;
-                       } while (--run_len);
-                       break;
-
-               default: /* Difference from old length  */
-                       value = (signed char)*lens - (signed char)presym;
-                       if (value < 0)
-                               value += 17;
-                       *lens = value;
-                       if (++lens == lens_end)
-                               return 0;
-                       break;
+                       /* Worst case overrun is when presym == 18,
+                        * run_len == 20 + 31, and only 1 length was remaining.
+                        * So LZX_READ_LENS_MAX_OVERRUN == 50.
+                        *
+                        * Overrun while reading the first half of maincode_lens
+                        * can corrupt the previous values in the second half.
+                        * This doesn't really matter because the resulting
+                        * lengths will still be in range, and data that
+                        * generates overruns is invalid anyway.  */
                 }
-       }
+       } while (len_ptr < lens_end);
+       return 0;
  }
  
  /*
@@ -383,6 +374,10 @@ lzx_read_block_header(struct input_bitstream *istream,
                 queue->R[0] = bitstream_read_u32(istream);
                 queue->R[1] = bitstream_read_u32(istream);
                 queue->R[2] = bitstream_read_u32(istream);
+
+               /* Offsets of 0 are invalid.  */
+               if (queue->R[0] == 0 || queue->R[1] == 0 || queue->R[2] == 0)
+                       return -1;
                 break;
  
         default:
@@ -395,117 +390,6 @@ lzx_read_block_header(struct input_bitstream *istream,
         return 0;
  }
  
-/*
- * Decode a match and copy its bytes into the decompression window.
- *
- * Return the length of the match in bytes, or 0 if the match underflowed the
- * window or overflowed the current block.
- */
-static u32
-lzx_decode_match(unsigned main_symbol, int block_type,
-                u32 bytes_remaining, u8 *window, u32 window_pos,
-                const struct lzx_tables *tables,
-                struct lzx_lru_queue *queue,
-                struct input_bitstream *istream)
-{
-       unsigned length_header;
-       unsigned position_slot;
-       u32 match_len;
-       u32 match_offset;
-       unsigned num_extra_bits;
-       u32 verbatim_bits;
-       u32 aligned_bits;
-
-       /* The main symbol is offset by 256 because values under 256 indicate a
-        * literal value.  */
-       main_symbol -= LZX_NUM_CHARS;
-
-       /* The length header consists of the lower 3 bits of the main element.
-        * The position slot is the rest of it. */
-       length_header = main_symbol & LZX_NUM_PRIMARY_LENS;
-       position_slot = main_symbol >> 3;
-
-       /* If the length_header is less than LZX_NUM_PRIMARY_LENS (= 7), it
-        * gives the match length as the offset from LZX_MIN_MATCH_LEN.
-        * Otherwise, the length is given by an additional symbol encoded using
-        * the length code, offset by 9 (LZX_MIN_MATCH_LEN +
-        * LZX_NUM_PRIMARY_LENS) */
-       match_len = LZX_MIN_MATCH_LEN + length_header;
-       if (length_header == LZX_NUM_PRIMARY_LENS)
-               match_len += read_huffsym_using_lencode(istream, tables);
-
-       /* If the position_slot is 0, 1, or 2, the match offset is retrieved
-        * from the LRU queue.  Otherwise, the match offset is not in the LRU
-        * queue. */
-       if (position_slot <= 2) {
-               /* Note: This isn't a real LRU queue, since using the R2 offset
-                * doesn't bump the R1 offset down to R2.  This quirk allows all
-                * 3 recent offsets to be handled by the same code.  (For R0,
-                * the swap is a no-op.)  */
-               match_offset = queue->R[position_slot];
-               queue->R[position_slot] = queue->R[0];
-               queue->R[0] = match_offset;
-       } else {
-               /* Otherwise, the offset was not encoded as one the offsets in
-                * the queue.  Depending on the position slot, there is a
-                * certain number of extra bits that need to be read to fully
-                * decode the match offset. */
-
-               /* Look up the number of extra bits that need to be read. */
-               num_extra_bits = lzx_get_num_extra_bits(position_slot);
-
-               /* For aligned blocks, if there are at least 3 extra bits, the
-                * actual number of extra bits is 3 less, and they encode a
-                * number of 8-byte words that are added to the offset; there
-                * is then an additional symbol read using the aligned offset
-                * code that specifies the actual byte alignment. */
-               if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3) {
-
-                       /* There is an error in the LZX "specification" at this
-                        * point; it indicates that a Huffman symbol is to be
-                        * read only if num_extra_bits is greater than 3, but
-                        * actually it is if num_extra_bits is greater than or
-                        * equal to 3.  (Note that in the case with
-                        * num_extra_bits == 3, the assignment to verbatim_bits
-                        * will just set it to 0. ) */
-                       verbatim_bits = bitstream_read_bits(istream,
-                                                           num_extra_bits - 3);
-                       verbatim_bits <<= 3;
-                       aligned_bits = read_huffsym_using_alignedcode(istream,
-                                                                     tables);
-               } else {
-                       /* For non-aligned blocks, or for aligned blocks with
-                        * less than 3 extra bits, the extra bits are added
-                        * directly to the match offset, and the correction for
-                        * the alignment is taken to be 0. */
-                       verbatim_bits = bitstream_read_bits(istream, num_extra_bits);
-                       aligned_bits = 0;
-               }
-
-               /* Calculate the match offset. */
-               match_offset = lzx_position_base[position_slot] +
-                              verbatim_bits + aligned_bits - LZX_OFFSET_OFFSET;
-
-               /* Update the LRU queue. */
-               queue->R[2] = queue->R[1];
-               queue->R[1] = queue->R[0];
-               queue->R[0] = match_offset;
-       }
-
-       /* Validate the match, then copy it to the current position.  */
-
-       if (unlikely(match_len > bytes_remaining))
-               return 0;
-
-       if (unlikely(match_offset > window_pos))
-               return 0;
-
-       lz_copy(&window[window_pos], match_len, match_offset,
-               &window[window_pos + bytes_remaining]);
-
-       return match_len;
-}
-
  /*
   * Decompress an LZX-compressed block of data.
   *
@@ -539,30 +423,88 @@ lzx_decompress_block(int block_type, u32 block_size,
                      struct lzx_lru_queue *queue,
                      struct input_bitstream *istream)
  {
-       u32 block_end;
-       unsigned main_symbol;
+       u8 *window_ptr = &window[window_pos];
+       u8 *window_end = window_ptr + block_size;
+       unsigned mainsym;
         u32 match_len;
+       unsigned position_slot;
+       u32 match_offset;
+       unsigned num_extra_bits;
+       unsigned ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED);
+
+       while (window_ptr != window_end) {
  
-       block_end = window_pos + block_size;
-       while (window_pos < block_end) {
-               main_symbol = read_huffsym_using_maincode(istream, tables);
-               if (main_symbol < LZX_NUM_CHARS) {
+               mainsym = read_huffsym_using_maincode(istream, tables);
+               if (mainsym < LZX_NUM_CHARS) {
                         /* Literal  */
-                       window[window_pos++] = main_symbol;
+                       *window_ptr++ = mainsym;
+                       continue;
+               }
+
+               /* Match  */
+
+               /* Decode the length header and position slot.  */
+               mainsym -= LZX_NUM_CHARS;
+               match_len = mainsym & 0x7;
+               position_slot = mainsym >> 3;
+
+               /* If needed, read a length symbol to decode the full length. */
+               if (match_len == 0x7)
+                       match_len += read_huffsym_using_lencode(istream, tables);
+               match_len += LZX_MIN_MATCH_LEN;
+
+               if (position_slot <= 2) {
+                       /* Repeat offset  */
+
+                       /* Note: This isn't a real LRU queue, since using the R2
+                        * offset doesn't bump the R1 offset down to R2.  This
+                        * quirk allows all 3 recent offsets to be handled by
+                        * the same code.  (For R0, the swap is a no-op.)  */
+                       match_offset = queue->R[position_slot];
+                       queue->R[position_slot] = queue->R[0];
+                       queue->R[0] = match_offset;
                 } else {
-                       /* Match  */
-                       match_len = lzx_decode_match(main_symbol,
-                                                    block_type,
-                                                    block_end - window_pos,
-                                                    window,
-                                                    window_pos,
-                                                    tables,
-                                                    queue,
-                                                    istream);
-                       if (unlikely(match_len == 0))
-                               return -1;
-                       window_pos += match_len;
+                       /* Explicit offset  */
+
+                       /* Look up the number of extra bits that need to be read
+                        * to decode offsets with this position slot.  */
+                       num_extra_bits = lzx_get_num_extra_bits(position_slot);
+
+                       /* Start with the position slot base value.  */
+                       match_offset = lzx_position_base[position_slot];
+
+                       /* In aligned offset blocks, the low-order 3 bits of
+                        * each offset are encoded using the aligned offset
+                        * code.  Otherwise, all the extra bits are literal.  */
+
+                       /*if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3) {*/
+                       if ((num_extra_bits & ones_if_aligned) >= 3) {
+                               match_offset += bitstream_read_bits(istream, num_extra_bits - 3) << 3;
+                               match_offset += read_huffsym_using_alignedcode(istream, tables);
+                       } else {
+                               match_offset += bitstream_read_bits(istream, num_extra_bits);
+                       }
+
+                       /* Adjust the offset.  */
+                       match_offset -= LZX_OFFSET_OFFSET;
+
+                       /* Update the match offset LRU queue.  */
+                       queue->R[2] = queue->R[1];
+                       queue->R[1] = queue->R[0];
+                       queue->R[0] = match_offset;
                 }
+
+               /* Validate the match, then copy it to the current position.  */
+
+               if (unlikely(match_len > window_end - window_ptr))
+                       return -1;
+
+               if (unlikely(match_offset > window_ptr - window))
+                       return -1;
+
+               lz_copy(window_ptr, match_len, match_offset, window_end);
+
+               window_ptr += match_len;
         }
         return 0;
  }
@@ -587,8 +529,8 @@ lzx_decompress(const void *compressed_data, size_t compressed_size,
         lzx_lru_queue_init(&queue);
  
         /* Codeword lengths begin as all 0's for delta encoding purposes.  */
-       memset(dec->tables.maincode_lens, 0, sizeof(dec->tables.maincode_lens));
-       memset(dec->tables.lencode_lens, 0, sizeof(dec->tables.lencode_lens));
+       memset(dec->tables.maincode_lens, 0, dec->num_main_syms);
+       memset(dec->tables.lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
  
         /* Set this to true if there may be 0xe8 bytes in the uncompressed data.
          */