]> wimlib.net Git - wimlib/blobdiff - include/wimlib/decompress_common.h
LZX, XPRESS: Use optimized write_bits() functions
[wimlib] / include / wimlib / decompress_common.h
index fab3c3e009413c4ef9ed4b9f0a4a6a3a672cc1a9..856c64117c6cf432cbdb3be761a0ea2f3433f70a 100644 (file)
@@ -54,17 +54,38 @@ init_input_bitstream(struct input_bitstream *istream,
  *
  * If the input data is exhausted, any further bits are assumed to be 0.  */
 static inline void
-bitstream_ensure_bits(struct input_bitstream *istream, unsigned num_bits)
+bitstream_ensure_bits(struct input_bitstream *istream, const unsigned num_bits)
 {
-       for (int nbits = num_bits; (int)istream->bitsleft < nbits; nbits -= 16) {
-               u16 nextword;
-               unsigned shift;
+       u16 nextword;
+       unsigned shift;
 
+       /* This currently works for at most 17 bits.  */
+       wimlib_assert2(num_bits <= 17);
+
+       if (istream->bitsleft >= num_bits)
+               return;
+
+       if (unlikely(istream->data_bytes_left < 2)) {
+               istream->bitsleft = num_bits;
+               return;
+       }
+
+       nextword = le16_to_cpu(*(const le16*)istream->data);
+       shift = sizeof(istream->bitbuf) * 8 - 16 - istream->bitsleft;
+       istream->bitbuf |= (u32)nextword << shift;
+       istream->data += 2;
+       istream->bitsleft += 16;
+       istream->data_bytes_left -= 2;
+
+       /* Help the compiler: If it's known at compile-time that num_bits <= 16,
+        * a second word will never be needed.  */
+       if (!(is_constant(num_bits) && num_bits <= 16) &&
+           unlikely(istream->bitsleft < num_bits))
+       {
                if (unlikely(istream->data_bytes_left < 2)) {
                        istream->bitsleft = num_bits;
                        return;
                }
-
                nextword = le16_to_cpu(*(const le16*)istream->data);
                shift = sizeof(istream->bitbuf) * 8 - 16 - istream->bitsleft;
                istream->bitbuf |= (u32)nextword << shift;
@@ -209,7 +230,7 @@ make_huffman_decode_table(u16 decode_table[], unsigned num_syms,
  * This function won't write any data beyond this position.
  */
 static inline void
-lz_copy(u8 *dst, unsigned length, unsigned offset, const u8 *winend)
+lz_copy(u8 *dst, u32 length, u32 offset, const u8 *winend)
 {
        const u8 *src = dst - offset;
 #if defined(__x86_64__) || defined(__i386__)