X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;ds=sidebyside;f=src%2Flzx_decompress.c;h=ba923fdd3aeb7e51b0238104f494ed07d7c1e64a;hb=b143910f8f18f0725adaf4af7e0bbe7ac5ab5382;hp=cce98e32a96904f850f5da6d3e10c2f11b9f39b5;hpb=4ee103c6e2a2988e1fb358bfa2dc38dcb621505a;p=wimlib diff --git a/src/lzx_decompress.c b/src/lzx_decompress.c index cce98e32..ba923fdd 100644 --- a/src/lzx_decompress.c +++ b/src/lzx_decompress.c @@ -18,7 +18,7 @@ * details. * * You should have received a copy of the GNU Lesser General Public License - * along with this file; if not, see http://www.gnu.org/licenses/. + * along with this file; if not, see https://www.gnu.org/licenses/. */ /* @@ -115,10 +115,10 @@ struct lzx_decompressor { * bits of aligned offset blocks */ u8 extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS]; -} _aligned_attribute(DECODE_TABLE_ALIGNMENT); +} __attribute__((aligned(DECODE_TABLE_ALIGNMENT))); /* Read a Huffman-encoded symbol using the precode. */ -static inline unsigned +static forceinline unsigned read_presym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->precode_decode_table, @@ -126,7 +126,7 @@ read_presym(const struct lzx_decompressor *d, struct input_bitstream *is) } /* Read a Huffman-encoded symbol using the main code. */ -static inline unsigned +static forceinline unsigned read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->maincode_decode_table, @@ -134,7 +134,7 @@ read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is) } /* Read a Huffman-encoded symbol using the length code. */ -static inline unsigned +static forceinline unsigned read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->lencode_decode_table, @@ -142,7 +142,7 @@ read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is) } /* Read a Huffman-encoded symbol using the aligned offset code. */ -static inline unsigned +static forceinline unsigned read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is) { return read_huffsym(is, d->alignedcode_decode_table, @@ -332,10 +332,19 @@ lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is, /* Decompress a block of LZX-compressed data. */ static int -lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is, +lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *_is, int block_type, u32 block_size, u8 * const out_begin, u8 *out_next, u32 recent_offsets[]) { + /* + * Redeclare the input bitstream on the stack. This shouldn't be + * needed, but it can improve the main loop's performance significantly + * with both gcc and clang, apparently because the compiler otherwise + * gets confused and doesn't properly allocate registers for + * 'is->bitbuf' et al. and/or thinks 'is->next' may point into 'is'. + */ + struct input_bitstream is_onstack = *_is; + struct input_bitstream *is = &is_onstack; u8 * const block_end = out_next + block_size; unsigned min_aligned_offset_slot; @@ -435,6 +444,7 @@ lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is, out_next += length; } while (out_next != block_end); + *_is = is_onstack; return 0; }