X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flzx_decompress.c;h=5747d327270717ae6892c257374088e4dfec789d;hb=38bd45bb7e08f2072e256afd5bcc21ceb0d97b8e;hp=9f93fcf781441cc815e155dda41b6a339910c879;hpb=908381d2809a48acd9490ec080e51087ae1529fd;p=wimlib diff --git a/src/lzx_decompress.c b/src/lzx_decompress.c index 9f93fcf7..5747d327 100644 --- a/src/lzx_decompress.c +++ b/src/lzx_decompress.c @@ -93,6 +93,21 @@ struct lzx_decompressor { u8 extra_offset_bits[LZX_MAX_OFFSET_SLOTS]; }; + union { + DECODE_TABLE_WORKING_SPACE(maincode_working_space, + LZX_MAINCODE_MAX_NUM_SYMBOLS, + LZX_MAX_MAIN_CODEWORD_LEN); + DECODE_TABLE_WORKING_SPACE(lencode_working_space, + LZX_LENCODE_NUM_SYMBOLS, + LZX_MAX_LEN_CODEWORD_LEN); + DECODE_TABLE_WORKING_SPACE(alignedcode_working_space, + LZX_ALIGNEDCODE_NUM_SYMBOLS, + LZX_MAX_ALIGNED_CODEWORD_LEN); + DECODE_TABLE_WORKING_SPACE(precode_working_space, + LZX_PRECODE_NUM_SYMBOLS, + LZX_MAX_PRE_CODEWORD_LEN); + }; + unsigned window_order; unsigned num_main_syms; @@ -103,7 +118,7 @@ struct lzx_decompressor { } _aligned_attribute(DECODE_TABLE_ALIGNMENT); /* Read a Huffman-encoded symbol using the precode. */ -static inline unsigned +static forceinline unsigned read_presym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->precode_decode_table, @@ -111,7 +126,7 @@ read_presym(const struct lzx_decompressor *d, struct input_bitstream *is) } /* Read a Huffman-encoded symbol using the main code. */ -static inline unsigned +static forceinline unsigned read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->maincode_decode_table, @@ -119,7 +134,7 @@ read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is) } /* Read a Huffman-encoded symbol using the length code. */ -static inline unsigned +static forceinline unsigned read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->lencode_decode_table, @@ -127,7 +142,7 @@ read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is) } /* Read a Huffman-encoded symbol using the aligned offset code. */ -static inline unsigned +static forceinline unsigned read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->alignedcode_decode_table, @@ -157,7 +172,8 @@ lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is, LZX_PRECODE_NUM_SYMBOLS, LZX_PRECODE_TABLEBITS, d->precode_lens, - LZX_MAX_PRE_CODEWORD_LEN)) + LZX_MAX_PRE_CODEWORD_LEN, + d->precode_working_space)) return -1; /* Decode the codeword lengths. */ @@ -316,10 +332,19 @@ lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is, /* Decompress a block of LZX-compressed data. */ static int -lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is, +lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *_is, int block_type, u32 block_size, u8 * const out_begin, u8 *out_next, u32 recent_offsets[]) { + /* + * Redeclare the input bitstream on the stack. This shouldn't be + * needed, but it can improve the main loop's performance significantly + * with both gcc and clang, apparently because the compiler otherwise + * gets confused and doesn't properly allocate registers for + * 'is->bitbuf' et al. and/or thinks 'is->next' may point into 'is'. + */ + struct input_bitstream is_onstack = *_is; + struct input_bitstream *is = &is_onstack; u8 * const block_end = out_next + block_size; unsigned min_aligned_offset_slot; @@ -333,14 +358,16 @@ lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is, d->num_main_syms, LZX_MAINCODE_TABLEBITS, d->maincode_lens, - LZX_MAX_MAIN_CODEWORD_LEN)) + LZX_MAX_MAIN_CODEWORD_LEN, + d->maincode_working_space)) return -1; if (make_huffman_decode_table(d->lencode_decode_table, LZX_LENCODE_NUM_SYMBOLS, LZX_LENCODE_TABLEBITS, d->lencode_lens, - LZX_MAX_LEN_CODEWORD_LEN)) + LZX_MAX_LEN_CODEWORD_LEN, + d->lencode_working_space)) return -1; if (block_type == LZX_BLOCKTYPE_ALIGNED) { @@ -348,7 +375,8 @@ lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is, LZX_ALIGNEDCODE_NUM_SYMBOLS, LZX_ALIGNEDCODE_TABLEBITS, d->alignedcode_lens, - LZX_MAX_ALIGNED_CODEWORD_LEN)) + LZX_MAX_ALIGNED_CODEWORD_LEN, + d->alignedcode_working_space)) return -1; min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT; memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned, @@ -409,20 +437,14 @@ lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is, } recent_offsets[0] = offset; - /* Validate the match, then copy it to the current position. */ - - if (unlikely(length > block_end - out_next)) + /* Validate the match and copy it to the current position. */ + if (unlikely(lz_copy(length, offset, out_begin, + out_next, block_end, LZX_MIN_MATCH_LEN))) return -1; - - if (unlikely(offset > out_next - out_begin)) - return -1; - - lz_copy(out_next, length, offset, block_end, LZX_MIN_MATCH_LEN); - out_next += length; - } while (out_next != block_end); + *_is = is_onstack; return 0; }