u8 extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
};
+ union {
+ DECODE_TABLE_WORKING_SPACE(maincode_working_space,
+ LZX_MAINCODE_MAX_NUM_SYMBOLS,
+ LZX_MAX_MAIN_CODEWORD_LEN);
+ DECODE_TABLE_WORKING_SPACE(lencode_working_space,
+ LZX_LENCODE_NUM_SYMBOLS,
+ LZX_MAX_LEN_CODEWORD_LEN);
+ DECODE_TABLE_WORKING_SPACE(alignedcode_working_space,
+ LZX_ALIGNEDCODE_NUM_SYMBOLS,
+ LZX_MAX_ALIGNED_CODEWORD_LEN);
+ DECODE_TABLE_WORKING_SPACE(precode_working_space,
+ LZX_PRECODE_NUM_SYMBOLS,
+ LZX_MAX_PRE_CODEWORD_LEN);
+ };
+
unsigned window_order;
unsigned num_main_syms;
} _aligned_attribute(DECODE_TABLE_ALIGNMENT);
/* Read a Huffman-encoded symbol using the precode. */
-static inline unsigned
+static forceinline unsigned
read_presym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->precode_decode_table,
}
/* Read a Huffman-encoded symbol using the main code. */
-static inline unsigned
+static forceinline unsigned
read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->maincode_decode_table,
}
/* Read a Huffman-encoded symbol using the length code. */
-static inline unsigned
+static forceinline unsigned
read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->lencode_decode_table,
}
/* Read a Huffman-encoded symbol using the aligned offset code. */
-static inline unsigned
+static forceinline unsigned
read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is)
{
return read_huffsym(is, d->alignedcode_decode_table,
LZX_PRECODE_NUM_SYMBOLS,
LZX_PRECODE_TABLEBITS,
d->precode_lens,
- LZX_MAX_PRE_CODEWORD_LEN))
+ LZX_MAX_PRE_CODEWORD_LEN,
+ d->precode_working_space))
return -1;
/* Decode the codeword lengths. */
/* Decompress a block of LZX-compressed data. */
static int
-lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is,
+lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *_is,
int block_type, u32 block_size,
u8 * const out_begin, u8 *out_next, u32 recent_offsets[])
{
+ /*
+ * Redeclare the input bitstream on the stack. This shouldn't be
+ * needed, but it can improve the main loop's performance significantly
+ * with both gcc and clang, apparently because the compiler otherwise
+ * gets confused and doesn't properly allocate registers for
+ * 'is->bitbuf' et al. and/or thinks 'is->next' may point into 'is'.
+ */
+ struct input_bitstream is_onstack = *_is;
+ struct input_bitstream *is = &is_onstack;
u8 * const block_end = out_next + block_size;
unsigned min_aligned_offset_slot;
d->num_main_syms,
LZX_MAINCODE_TABLEBITS,
d->maincode_lens,
- LZX_MAX_MAIN_CODEWORD_LEN))
+ LZX_MAX_MAIN_CODEWORD_LEN,
+ d->maincode_working_space))
return -1;
if (make_huffman_decode_table(d->lencode_decode_table,
LZX_LENCODE_NUM_SYMBOLS,
LZX_LENCODE_TABLEBITS,
d->lencode_lens,
- LZX_MAX_LEN_CODEWORD_LEN))
+ LZX_MAX_LEN_CODEWORD_LEN,
+ d->lencode_working_space))
return -1;
if (block_type == LZX_BLOCKTYPE_ALIGNED) {
LZX_ALIGNEDCODE_NUM_SYMBOLS,
LZX_ALIGNEDCODE_TABLEBITS,
d->alignedcode_lens,
- LZX_MAX_ALIGNED_CODEWORD_LEN))
+ LZX_MAX_ALIGNED_CODEWORD_LEN,
+ d->alignedcode_working_space))
return -1;
min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned,
out_next += length;
} while (out_next != block_end);
+ *_is = is_onstack;
return 0;
}