} _aligned_attribute(DECODE_TABLE_ALIGNMENT);
/* Read a Huffman-encoded symbol using the precode. */
-static inline unsigned
+static forceinline unsigned
read_presym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->precode_decode_table,
}
/* Read a Huffman-encoded symbol using the main code. */
-static inline unsigned
+static forceinline unsigned
read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->maincode_decode_table,
}
/* Read a Huffman-encoded symbol using the length code. */
-static inline unsigned
+static forceinline unsigned
read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->lencode_decode_table,
}
/* Read a Huffman-encoded symbol using the aligned offset code. */
-static inline unsigned
+static forceinline unsigned
read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->alignedcode_decode_table,
/* Decompress a block of LZX-compressed data. */
static int
-lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is,
+lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *_is,
int block_type, u32 block_size,
u8 * const out_begin, u8 *out_next, u32 recent_offsets[])
{
+ /*
+ * Redeclare the input bitstream on the stack. This shouldn't be
+ * needed, but it can improve the main loop's performance significantly
+ * with both gcc and clang, apparently because the compiler otherwise
+ * gets confused and doesn't properly allocate registers for
+ * 'is->bitbuf' et al. and/or thinks 'is->next' may point into 'is'.
+ */
+ struct input_bitstream is_onstack = *_is;
+ struct input_bitstream *is = &is_onstack;
u8 * const block_end = out_next + block_size;
unsigned min_aligned_offset_slot;
out_next += length;
} while (out_next != block_end);
+ *_is = is_onstack;
return 0;
}