From acb16926b00fabdba062ff4300da5d3aef32ceeb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2014 16:40:56 -0500 Subject: [PATCH] lzx-decompress.c: Inline and optimize lzx_decode_match() --- src/lzx-decompress.c | 204 ++++++++++++++++--------------------------- 1 file changed, 74 insertions(+), 130 deletions(-) diff --git a/src/lzx-decompress.c b/src/lzx-decompress.c index 513944ee..8fc77844 100644 --- a/src/lzx-decompress.c +++ b/src/lzx-decompress.c @@ -399,117 +399,6 @@ lzx_read_block_header(struct input_bitstream *istream, return 0; } -/* - * Decode a match and copy its bytes into the decompression window. - * - * Return the length of the match in bytes, or 0 if the match underflowed the - * window or overflowed the current block. - */ -static u32 -lzx_decode_match(unsigned main_symbol, int block_type, - u32 bytes_remaining, u8 *window, u32 window_pos, - const struct lzx_tables *tables, - struct lzx_lru_queue *queue, - struct input_bitstream *istream) -{ - unsigned length_header; - unsigned position_slot; - u32 match_len; - u32 match_offset; - unsigned num_extra_bits; - u32 verbatim_bits; - u32 aligned_bits; - - /* The main symbol is offset by 256 because values under 256 indicate a - * literal value. */ - main_symbol -= LZX_NUM_CHARS; - - /* The length header consists of the lower 3 bits of the main element. - * The position slot is the rest of it. */ - length_header = main_symbol & LZX_NUM_PRIMARY_LENS; - position_slot = main_symbol >> 3; - - /* If the length_header is less than LZX_NUM_PRIMARY_LENS (= 7), it - * gives the match length as the offset from LZX_MIN_MATCH_LEN. - * Otherwise, the length is given by an additional symbol encoded using - * the length code, offset by 9 (LZX_MIN_MATCH_LEN + - * LZX_NUM_PRIMARY_LENS) */ - match_len = LZX_MIN_MATCH_LEN + length_header; - if (length_header == LZX_NUM_PRIMARY_LENS) - match_len += read_huffsym_using_lencode(istream, tables); - - /* If the position_slot is 0, 1, or 2, the match offset is retrieved - * from the LRU queue. Otherwise, the match offset is not in the LRU - * queue. */ - if (position_slot <= 2) { - /* Note: This isn't a real LRU queue, since using the R2 offset - * doesn't bump the R1 offset down to R2. This quirk allows all - * 3 recent offsets to be handled by the same code. (For R0, - * the swap is a no-op.) */ - match_offset = queue->R[position_slot]; - queue->R[position_slot] = queue->R[0]; - queue->R[0] = match_offset; - } else { - /* Otherwise, the offset was not encoded as one the offsets in - * the queue. Depending on the position slot, there is a - * certain number of extra bits that need to be read to fully - * decode the match offset. */ - - /* Look up the number of extra bits that need to be read. */ - num_extra_bits = lzx_get_num_extra_bits(position_slot); - - /* For aligned blocks, if there are at least 3 extra bits, the - * actual number of extra bits is 3 less, and they encode a - * number of 8-byte words that are added to the offset; there - * is then an additional symbol read using the aligned offset - * code that specifies the actual byte alignment. */ - if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3) { - - /* There is an error in the LZX "specification" at this - * point; it indicates that a Huffman symbol is to be - * read only if num_extra_bits is greater than 3, but - * actually it is if num_extra_bits is greater than or - * equal to 3. (Note that in the case with - * num_extra_bits == 3, the assignment to verbatim_bits - * will just set it to 0. ) */ - verbatim_bits = bitstream_read_bits(istream, - num_extra_bits - 3); - verbatim_bits <<= 3; - aligned_bits = read_huffsym_using_alignedcode(istream, - tables); - } else { - /* For non-aligned blocks, or for aligned blocks with - * less than 3 extra bits, the extra bits are added - * directly to the match offset, and the correction for - * the alignment is taken to be 0. */ - verbatim_bits = bitstream_read_bits(istream, num_extra_bits); - aligned_bits = 0; - } - - /* Calculate the match offset. */ - match_offset = lzx_position_base[position_slot] + - verbatim_bits + aligned_bits - LZX_OFFSET_OFFSET; - - /* Update the LRU queue. */ - queue->R[2] = queue->R[1]; - queue->R[1] = queue->R[0]; - queue->R[0] = match_offset; - } - - /* Validate the match, then copy it to the current position. */ - - if (unlikely(match_len > bytes_remaining)) - return 0; - - if (unlikely(match_offset > window_pos)) - return 0; - - lz_copy(&window[window_pos], match_len, match_offset, - &window[window_pos + bytes_remaining]); - - return match_len; -} - /* * Decompress an LZX-compressed block of data. * @@ -543,30 +432,85 @@ lzx_decompress_block(int block_type, u32 block_size, struct lzx_lru_queue *queue, struct input_bitstream *istream) { - u32 block_end; - unsigned main_symbol; + u8 *window_ptr = &window[window_pos]; + u8 *window_end = window_ptr + block_size; + unsigned mainsym; u32 match_len; + unsigned position_slot; + u32 match_offset; + unsigned num_extra_bits; + + while (window_ptr != window_end) { - block_end = window_pos + block_size; - while (window_pos < block_end) { - main_symbol = read_huffsym_using_maincode(istream, tables); - if (main_symbol < LZX_NUM_CHARS) { + mainsym = read_huffsym_using_maincode(istream, tables); + if (mainsym < LZX_NUM_CHARS) { /* Literal */ - window[window_pos++] = main_symbol; + *window_ptr++ = mainsym; + continue; + } + + /* Match */ + + /* Decode the length header and position slot. */ + mainsym -= LZX_NUM_CHARS; + match_len = mainsym & 0x7; + position_slot = mainsym >> 3; + + /* If needed, read a length symbol to decode the full length. */ + if (match_len == 0x7) + match_len += read_huffsym_using_lencode(istream, tables); + match_len += LZX_MIN_MATCH_LEN; + + if (position_slot <= 2) { + /* Repeat offset */ + + /* Note: This isn't a real LRU queue, since using the R2 + * offset doesn't bump the R1 offset down to R2. This + * quirk allows all 3 recent offsets to be handled by + * the same code. (For R0, the swap is a no-op.) */ + match_offset = queue->R[position_slot]; + queue->R[position_slot] = queue->R[0]; + queue->R[0] = match_offset; } else { - /* Match */ - match_len = lzx_decode_match(main_symbol, - block_type, - block_end - window_pos, - window, - window_pos, - tables, - queue, - istream); - if (unlikely(match_len == 0)) - return -1; - window_pos += match_len; + /* Explicit offset */ + + /* Look up the number of extra bits that need to be read + * to decode offsets with this position slot. */ + num_extra_bits = lzx_get_num_extra_bits(position_slot); + + /* Start with the position slot base value. */ + match_offset = lzx_position_base[position_slot]; + + /* In aligned offset blocks, the low-order 3 bits of + * each offset are encoded using the aligned offset + * code. Otherwise, all the extra bits are literal. */ + if (block_type == LZX_BLOCKTYPE_ALIGNED && num_extra_bits >= 3) { + match_offset += bitstream_read_bits(istream, num_extra_bits - 3) << 3; + match_offset += read_huffsym_using_alignedcode(istream, tables); + } else { + match_offset += bitstream_read_bits(istream, num_extra_bits); + } + + /* Adjust the offset. */ + match_offset -= LZX_OFFSET_OFFSET; + + /* Update the match offset LRU queue. */ + queue->R[2] = queue->R[1]; + queue->R[1] = queue->R[0]; + queue->R[0] = match_offset; } + + /* Validate the match, then copy it to the current position. */ + + if (unlikely(match_len > window_end - window_ptr)) + return -1; + + if (unlikely(match_offset > window_ptr - window)) + return -1; + + lz_copy(window_ptr, match_len, match_offset, window_end); + + window_ptr += match_len; } return 0; } -- 2.43.0