From: Eric Biggers Date: Sun, 16 Aug 2015 17:00:48 +0000 (-0500) Subject: LZMS: more bit decoding/encoding optimizations X-Git-Tag: v1.8.2~9 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=5472f62361eb32dcb357a6b90ddc3f2481b68774 LZMS: more bit decoding/encoding optimizations --- diff --git a/include/wimlib/lzms_common.h b/include/wimlib/lzms_common.h index 69b6e67b..093e50d2 100644 --- a/include/wimlib/lzms_common.h +++ b/include/wimlib/lzms_common.h @@ -76,11 +76,34 @@ lzms_update_probability_entry(struct lzms_probability_entry *entry, int bit) { BUILD_BUG_ON(LZMS_PROBABILITY_DENOMINATOR != sizeof(entry->recent_bits) * 8); - s32 delta_zero_bits = (s32)(entry->recent_bits >> - (LZMS_PROBABILITY_DENOMINATOR - 1)) - bit; - - entry->num_recent_zero_bits += delta_zero_bits; - entry->recent_bits = (entry->recent_bits << 1) | bit; +#ifdef __x86_64__ + if (__builtin_constant_p(bit)) { + /* Optimized implementation for x86_64 using carry flag */ + if (bit) { + __asm__("shlq %[recent_bits] \n" + "adcl $0xffffffff, %[num_recent_zero_bits] \n" + "orq $0x1, %[recent_bits] \n" + : [recent_bits] "+r" (entry->recent_bits), + [num_recent_zero_bits] "+mr" (entry->num_recent_zero_bits) + : + : "cc"); + } else { + __asm__("shlq %[recent_bits] \n" + "adcl $0x0, %[num_recent_zero_bits] \n" + : [recent_bits] "+m" (entry->recent_bits), + [num_recent_zero_bits] "+mr" (entry->num_recent_zero_bits) + : + : "cc"); + } + } else +#endif + { + s32 delta_zero_bits = (s32)(entry->recent_bits >> + (LZMS_PROBABILITY_DENOMINATOR - 1)) - bit; + + entry->num_recent_zero_bits += delta_zero_bits; + entry->recent_bits = (entry->recent_bits << 1) | bit; + } } /* Given a probability entry, return the chance out of @@ -91,10 +114,19 @@ lzms_get_probability(const struct lzms_probability_entry *prob_entry) u32 prob = prob_entry->num_recent_zero_bits; /* 0% and 100% probabilities aren't allowed. */ - if (prob == 0) - prob++; - else if (prob == LZMS_PROBABILITY_DENOMINATOR) - prob--; + + /* + * if (prob == 0) + * prob++; + */ + prob += (prob - 1) >> 31; + + /* + * if (prob == LZMS_PROBABILITY_DENOMINATOR) + * prob--; + */ + prob -= (prob >> LZMS_PROBABILITY_BITS); + return prob; } diff --git a/src/lzms_decompress.c b/src/lzms_decompress.c index d5d51417..5093a3d3 100644 --- a/src/lzms_decompress.c +++ b/src/lzms_decompress.c @@ -463,18 +463,22 @@ lzms_decode_bit(struct lzms_range_decoder *rd, u32 *state_p, u32 num_states, /* Load the probability entry corresponding to the current state. */ prob_entry = &probs[*state_p]; + /* Update the state early. We'll still need to OR the state with 1 + * later if the decoded bit is a 1. */ + *state_p = (*state_p << 1) & (num_states - 1); + + /* Get the probability (out of LZMS_PROBABILITY_DENOMINATOR) that the + * next bit is 0. */ + prob = lzms_get_probability(prob_entry); + /* Normalize if needed. */ - if (rd->range <= 0xffff) { + if (!(rd->range & 0xFFFF0000)) { rd->range <<= 16; rd->code <<= 16; if (likely(rd->next != rd->end)) rd->code |= le16_to_cpu(*rd->next++); } - /* Get the probability (out of LZMS_PROBABILITY_DENOMINATOR) that the - * next bit is 0. */ - prob = lzms_get_probability(prob_entry); - /* Based on the probability, calculate the bound between the 0-bit * region and the 1-bit region of the range. */ bound = (rd->range >> LZMS_PROBABILITY_BITS) * prob; @@ -484,7 +488,6 @@ lzms_decode_bit(struct lzms_range_decoder *rd, u32 *state_p, u32 num_states, rd->range = bound; /* Update the state and probability entry based on the decoded bit. */ - *state_p = ((*state_p << 1) | 0) & (num_states - 1); lzms_update_probability_entry(prob_entry, 0); return 0; } else { @@ -493,8 +496,8 @@ lzms_decode_bit(struct lzms_range_decoder *rd, u32 *state_p, u32 num_states, rd->code -= bound; /* Update the state and probability entry based on the decoded bit. */ - *state_p = ((*state_p << 1) | 1) & (num_states - 1); lzms_update_probability_entry(prob_entry, 1); + *state_p |= 1; return 1; } }