LZX, XPRESS: Use optimized write_bits() functions

[wimlib] / include / wimlib / decompress_common.h
diff --git a/include/wimlib/decompress_common.h b/include/wimlib/decompress_common.h

index fab3c3e009413c4ef9ed4b9f0a4a6a3a672cc1a9..856c64117c6cf432cbdb3be761a0ea2f3433f70a 100644 (file)
--- a/include/wimlib/decompress_common.h
+++ b/include/wimlib/decompress_common.h
@@ -54,17 +54,38 @@ init_input_bitstream(struct input_bitstream *istream,
   *
   * If the input data is exhausted, any further bits are assumed to be 0.  */
  static inline void
-bitstream_ensure_bits(struct input_bitstream *istream, unsigned num_bits)
+bitstream_ensure_bits(struct input_bitstream *istream, const unsigned num_bits)
  {
-       for (int nbits = num_bits; (int)istream->bitsleft < nbits; nbits -= 16) {
-               u16 nextword;
-               unsigned shift;
+       u16 nextword;
+       unsigned shift;
  
+       /* This currently works for at most 17 bits.  */
+       wimlib_assert2(num_bits <= 17);
+
+       if (istream->bitsleft >= num_bits)
+               return;
+
+       if (unlikely(istream->data_bytes_left < 2)) {
+               istream->bitsleft = num_bits;
+               return;
+       }
+
+       nextword = le16_to_cpu(*(const le16*)istream->data);
+       shift = sizeof(istream->bitbuf) * 8 - 16 - istream->bitsleft;
+       istream->bitbuf |= (u32)nextword << shift;
+       istream->data += 2;
+       istream->bitsleft += 16;
+       istream->data_bytes_left -= 2;
+
+       /* Help the compiler: If it's known at compile-time that num_bits <= 16,
+        * a second word will never be needed.  */
+       if (!(is_constant(num_bits) && num_bits <= 16) &&
+           unlikely(istream->bitsleft < num_bits))
+       {
                 if (unlikely(istream->data_bytes_left < 2)) {
                         istream->bitsleft = num_bits;
                         return;
                 }
-
                 nextword = le16_to_cpu(*(const le16*)istream->data);
                 shift = sizeof(istream->bitbuf) * 8 - 16 - istream->bitsleft;
                 istream->bitbuf |= (u32)nextword << shift;
@@ -209,7 +230,7 @@ make_huffman_decode_table(u16 decode_table[], unsigned num_syms,
   * This function won't write any data beyond this position.
   */
  static inline void
-lz_copy(u8 *dst, unsigned length, unsigned offset, const u8 *winend)
+lz_copy(u8 *dst, u32 length, u32 offset, const u8 *winend)
  {
         const u8 *src = dst - offset;
  #if defined(__x86_64__) || defined(__i386__)