X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flzms_decompress.c;h=bafe85ee35f5786eaed25fa5194aea1f649ab7a4;hb=151463861a212a1ae7deae7e9844cd0cd1c4343c;hp=bd7e76e12e809dcbd222baf08557ebce51990717;hpb=9bb97aee4a44801c1d755916aa7a132b2cdec80f;p=wimlib diff --git a/src/lzms_decompress.c b/src/lzms_decompress.c index bd7e76e1..bafe85ee 100644 --- a/src/lzms_decompress.c +++ b/src/lzms_decompress.c @@ -301,13 +301,14 @@ struct lzms_input_bitstream { const le16 *begin; }; +#define BITBUF_NBITS (8 * sizeof(bitbuf_t)) + /* Bookkeeping information for an adaptive Huffman code */ struct lzms_huffman_rebuild_info { unsigned num_syms_until_rebuild; unsigned num_syms; unsigned rebuild_freq; u32 *codewords; - u8 *lens; u32 *freqs; u16 *decode_table; unsigned table_bits; @@ -334,22 +335,18 @@ struct lzms_decompressor { /* States and probability entries for item type disambiguation */ u32 main_state; - struct lzms_probability_entry main_probs[LZMS_NUM_MAIN_PROBS]; - u32 match_state; - struct lzms_probability_entry match_probs[LZMS_NUM_MATCH_PROBS]; - u32 lz_state; - struct lzms_probability_entry lz_probs[LZMS_NUM_LZ_PROBS]; - u32 delta_state; - struct lzms_probability_entry delta_probs[LZMS_NUM_DELTA_PROBS]; - u32 lz_rep_states[LZMS_NUM_LZ_REP_DECISIONS]; + u32 delta_rep_states[LZMS_NUM_DELTA_REP_DECISIONS]; + + struct lzms_probability_entry main_probs[LZMS_NUM_MAIN_PROBS]; + struct lzms_probability_entry match_probs[LZMS_NUM_MATCH_PROBS]; + struct lzms_probability_entry lz_probs[LZMS_NUM_LZ_PROBS]; + struct lzms_probability_entry delta_probs[LZMS_NUM_DELTA_PROBS]; struct lzms_probability_entry lz_rep_probs[LZMS_NUM_LZ_REP_DECISIONS] [LZMS_NUM_LZ_REP_PROBS]; - - u32 delta_rep_states[LZMS_NUM_DELTA_REP_DECISIONS]; struct lzms_probability_entry delta_rep_probs[LZMS_NUM_DELTA_REP_DECISIONS] [LZMS_NUM_DELTA_REP_PROBS]; @@ -386,7 +383,6 @@ struct lzms_decompressor { struct lzms_huffman_rebuild_info delta_power_rebuild_info; u32 codewords[LZMS_MAX_NUM_SYMS]; - u8 lens[LZMS_MAX_NUM_SYMS]; }; // struct @@ -412,25 +408,35 @@ lzms_input_bitstream_init(struct lzms_input_bitstream *is, static inline void lzms_ensure_bits(struct lzms_input_bitstream *is, unsigned num_bits) { + unsigned avail; + if (is->bitsleft >= num_bits) return; - if (likely(is->next != is->begin)) - is->bitbuf |= (bitbuf_t)le16_to_cpu(*--is->next) - << (sizeof(is->bitbuf) * 8 - is->bitsleft - 16); - is->bitsleft += 16; + avail = BITBUF_NBITS - is->bitsleft; - if (likely(is->next != is->begin)) - is->bitbuf |= (bitbuf_t)le16_to_cpu(*--is->next) - << (sizeof(is->bitbuf) * 8 - is->bitsleft - 16); - is->bitsleft += 16; + if (UNALIGNED_ACCESS_IS_FAST && CPU_IS_LITTLE_ENDIAN && + WORDSIZE == 8 && likely((u8 *)is->next - (u8 *)is->begin >= 8)) + { + is->next -= avail >> 4; + is->bitbuf |= load_u64_unaligned(is->next) << (avail & 15); + is->bitsleft += avail & ~15; + } else { + if (likely(is->next != is->begin)) + is->bitbuf |= (bitbuf_t)le16_to_cpu(*--is->next) + << (avail - 16); + if (likely(is->next != is->begin)) + is->bitbuf |=(bitbuf_t)le16_to_cpu(*--is->next) + << (avail - 32); + is->bitsleft += 32; + } } /* Get @num_bits bits from the bitbuffer variable. */ static inline bitbuf_t lzms_peek_bits(struct lzms_input_bitstream *is, unsigned num_bits) { - return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1); + return (is->bitbuf >> 1) >> (BITBUF_NBITS - num_bits - 1); } /* Remove @num_bits bits from the bitbuffer variable. */ @@ -471,16 +477,25 @@ lzms_range_decoder_init(struct lzms_range_decoder *rd, } /* - * Decode and return the next bit from the range decoder. - * - * @prob is the probability out of LZMS_PROBABILITY_DENOMINATOR that the next - * bit is 0 rather than 1. + * Decode a bit using the range coder. The current state specifies the + * probability entry to use. The state and probability entry will be updated + * based on the decoded bit. */ static inline int -lzms_range_decode_bit(struct lzms_range_decoder *rd, u32 prob) +lzms_decode_bit(struct lzms_range_decoder *rd, u32 *state_p, u32 num_states, + struct lzms_probability_entry *probs) { + struct lzms_probability_entry *prob_entry; + u32 prob; u32 bound; + /* Load the probability entry corresponding to the current state. */ + prob_entry = &probs[*state_p]; + + /* Get the probability (out of LZMS_PROBABILITY_DENOMINATOR) that the + * next bit is 0. */ + prob = lzms_get_probability(prob_entry); + /* Normalize if needed. */ if (rd->range <= 0xffff) { rd->range <<= 16; @@ -496,44 +511,23 @@ lzms_range_decode_bit(struct lzms_range_decoder *rd, u32 prob) if (rd->code < bound) { /* Current code is in the 0-bit region of the range. */ rd->range = bound; + + /* Update the state and probability entry based on the decoded bit. */ + *state_p = ((*state_p << 1) | 0) & (num_states - 1); + lzms_update_probability_entry(prob_entry, 0); return 0; } else { /* Current code is in the 1-bit region of the range. */ rd->range -= bound; rd->code -= bound; + + /* Update the state and probability entry based on the decoded bit. */ + *state_p = ((*state_p << 1) | 1) & (num_states - 1); + lzms_update_probability_entry(prob_entry, 1); return 1; } } -/* - * Decode a bit. This wraps around lzms_range_decode_bit() to handle using and - * updating the state and its corresponding probability entry. - */ -static inline int -lzms_decode_bit(struct lzms_range_decoder *rd, u32 *state_p, u32 num_states, - struct lzms_probability_entry *probs) -{ - struct lzms_probability_entry *prob_entry; - u32 prob; - int bit; - - /* Load the probability entry corresponding to the current state. */ - prob_entry = &probs[*state_p]; - - /* Get the probability that the next bit is 0. */ - prob = lzms_get_probability(prob_entry); - - /* Decode the next bit. */ - bit = lzms_range_decode_bit(rd, prob); - - /* Update the state and probability entry based on the decoded bit. */ - *state_p = ((*state_p << 1) | bit) & (num_states - 1); - lzms_update_probability_entry(prob_entry, bit); - - /* Return the decoded bit. */ - return bit; -} - static int lzms_decode_main_bit(struct lzms_decompressor *d) { @@ -582,13 +576,13 @@ lzms_build_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info) make_canonical_huffman_code(rebuild_info->num_syms, LZMS_MAX_CODEWORD_LENGTH, rebuild_info->freqs, - rebuild_info->lens, + (u8 *)rebuild_info->decode_table, rebuild_info->codewords); make_huffman_decode_table(rebuild_info->decode_table, rebuild_info->num_syms, rebuild_info->table_bits, - rebuild_info->lens, + (u8 *)rebuild_info->decode_table, LZMS_MAX_CODEWORD_LENGTH); rebuild_info->num_syms_until_rebuild = rebuild_info->rebuild_freq; @@ -597,13 +591,12 @@ lzms_build_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info) static void lzms_init_huffman_code(struct lzms_huffman_rebuild_info *rebuild_info, unsigned num_syms, unsigned rebuild_freq, - u32 *codewords, u8 *lens, u32 *freqs, + u32 *codewords, u32 *freqs, u16 *decode_table, unsigned table_bits) { rebuild_info->num_syms = num_syms; rebuild_info->rebuild_freq = rebuild_freq; rebuild_info->codewords = codewords; - rebuild_info->lens = lens; rebuild_info->freqs = freqs; rebuild_info->decode_table = decode_table; rebuild_info->table_bits = table_bits; @@ -740,23 +733,23 @@ lzms_decode_items(struct lzms_decompressor * const restrict d, u32 offset; u32 length; - if (d->pending_lz_offset != 0 && - out_next != d->lz_offset_still_pending) - { - BUILD_BUG_ON(LZMS_NUM_LZ_REPS != 3); - d->recent_lz_offsets[3] = d->recent_lz_offsets[2]; - d->recent_lz_offsets[2] = d->recent_lz_offsets[1]; - d->recent_lz_offsets[1] = d->recent_lz_offsets[0]; - d->recent_lz_offsets[0] = d->pending_lz_offset; - d->pending_lz_offset = 0; - } - if (!lzms_decode_lz_bit(d)) { /* Explicit offset */ offset = lzms_decode_lz_offset(d); } else { /* Repeat offset */ + if (d->pending_lz_offset != 0 && + out_next != d->lz_offset_still_pending) + { + BUILD_BUG_ON(LZMS_NUM_LZ_REPS != 3); + d->recent_lz_offsets[3] = d->recent_lz_offsets[2]; + d->recent_lz_offsets[2] = d->recent_lz_offsets[1]; + d->recent_lz_offsets[1] = d->recent_lz_offsets[0]; + d->recent_lz_offsets[0] = d->pending_lz_offset; + d->pending_lz_offset = 0; + } + BUILD_BUG_ON(LZMS_NUM_LZ_REPS != 3); if (!lzms_decode_lz_rep_bit(d, 0)) { offset = d->recent_lz_offsets[0]; @@ -805,17 +798,6 @@ lzms_decode_items(struct lzms_decompressor * const restrict d, const u8 *matchptr; u32 length; - if (d->pending_delta_pair != 0 && - out_next != d->delta_pair_still_pending) - { - BUILD_BUG_ON(LZMS_NUM_DELTA_REPS != 3); - d->recent_delta_pairs[3] = d->recent_delta_pairs[2]; - d->recent_delta_pairs[2] = d->recent_delta_pairs[1]; - d->recent_delta_pairs[1] = d->recent_delta_pairs[0]; - d->recent_delta_pairs[0] = d->pending_delta_pair; - d->pending_delta_pair = 0; - } - if (!lzms_decode_delta_bit(d)) { /* Explicit offset */ power = lzms_decode_delta_power(d); @@ -824,6 +806,17 @@ lzms_decode_items(struct lzms_decompressor * const restrict d, /* Repeat offset */ u64 val; + if (d->pending_delta_pair != 0 && + out_next != d->delta_pair_still_pending) + { + BUILD_BUG_ON(LZMS_NUM_DELTA_REPS != 3); + d->recent_delta_pairs[3] = d->recent_delta_pairs[2]; + d->recent_delta_pairs[2] = d->recent_delta_pairs[1]; + d->recent_delta_pairs[1] = d->recent_delta_pairs[0]; + d->recent_delta_pairs[0] = d->pending_delta_pair; + d->pending_delta_pair = 0; + } + BUILD_BUG_ON(LZMS_NUM_DELTA_REPS != 3); if (!lzms_decode_delta_rep_bit(d, 0)) { val = d->recent_delta_pairs[0]; @@ -934,7 +927,6 @@ lzms_init_decompressor(struct lzms_decompressor *d, const void *in, LZMS_NUM_LITERAL_SYMS, LZMS_LITERAL_CODE_REBUILD_FREQ, d->codewords, - d->lens, d->literal_freqs, d->literal_decode_table, LZMS_LITERAL_TABLEBITS); @@ -943,7 +935,6 @@ lzms_init_decompressor(struct lzms_decompressor *d, const void *in, num_offset_slots, LZMS_LZ_OFFSET_CODE_REBUILD_FREQ, d->codewords, - d->lens, d->lz_offset_freqs, d->lz_offset_decode_table, LZMS_LZ_OFFSET_TABLEBITS); @@ -952,7 +943,6 @@ lzms_init_decompressor(struct lzms_decompressor *d, const void *in, LZMS_NUM_LENGTH_SYMS, LZMS_LENGTH_CODE_REBUILD_FREQ, d->codewords, - d->lens, d->length_freqs, d->length_decode_table, LZMS_LENGTH_TABLEBITS); @@ -961,7 +951,6 @@ lzms_init_decompressor(struct lzms_decompressor *d, const void *in, num_offset_slots, LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ, d->codewords, - d->lens, d->delta_offset_freqs, d->delta_offset_decode_table, LZMS_DELTA_OFFSET_TABLEBITS); @@ -970,7 +959,6 @@ lzms_init_decompressor(struct lzms_decompressor *d, const void *in, LZMS_NUM_DELTA_POWER_SYMS, LZMS_DELTA_POWER_CODE_REBUILD_FREQ, d->codewords, - d->lens, d->delta_power_freqs, d->delta_power_decode_table, LZMS_DELTA_POWER_TABLEBITS);