From: Eric Biggers Date: Fri, 13 Dec 2013 17:15:19 +0000 (-0600) Subject: Fix reading > 16 bits from bitstream X-Git-Tag: v1.6.0~153 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=35ff480326a41f7f74fe0a497c636a811922f276 Fix reading > 16 bits from bitstream --- diff --git a/include/wimlib/compress.h b/include/wimlib/compress.h index 5f259e83..fcd3c411 100644 --- a/include/wimlib/compress.h +++ b/include/wimlib/compress.h @@ -7,26 +7,24 @@ #ifndef _WIMLIB_COMPRESS_H #define _WIMLIB_COMPRESS_H -#include "wimlib/endianness.h" #include "wimlib/types.h" -typedef u16 output_bitbuf_t; - /* Variable type that can represent all possible window positions. */ -typedef u32 freq_t; #ifndef INPUT_IDX_T_DEFINED #define INPUT_IDX_T_DEFINED typedef u32 input_idx_t; #endif -/* Structure to keep track of the current position in the compressed output. */ +/* Structure to keep track of the current state sending bits and bytes to the + * compressed output buffer. */ struct output_bitstream { - /* A variable to buffer writing bits to the output and is flushed to the - * compressed output when full. */ - output_bitbuf_t bitbuf; + /* Variable that holds up to 16 bits that haven't yet been flushed to + * the output. */ + u16 bitbuf; - /* Number of free bits in @bitbuf */ + /* Number of free bits in @bitbuf; that is, 16 minus the number of valid + * bits in @bitbuf. */ unsigned free_bits; /* Pointer to the start of the output buffer. */ @@ -45,7 +43,7 @@ struct output_bitstream { */ u8 *output; - /* Bytes remaining in @output buffer. */ + /* Number of bytes remaining in the @output buffer. */ input_idx_t bytes_remaining; /* Set to true if the buffer has been exhausted. */ @@ -81,18 +79,18 @@ typedef void (*lz_record_match_t)(unsigned len, unsigned offset, void *ctx); typedef void (*lz_record_literal_t)(u8 lit, void *ctx); extern void -lz_analyze_block(const u8 window[], +lz_analyze_block(const u8 window[restrict], input_idx_t window_size, lz_record_match_t record_match, lz_record_literal_t record_literal, void *record_ctx, const struct lz_params *params, - input_idx_t prev_tab[]); + input_idx_t prev_tab[restrict]); extern void make_canonical_huffman_code(unsigned num_syms, unsigned max_codeword_len, - const freq_t freq_tab[restrict], + const input_idx_t freq_tab[restrict], u8 lens[restrict], u16 codewords[restrict]); diff --git a/include/wimlib/decompress.h b/include/wimlib/decompress.h index b1f534fb..ec8804e6 100644 --- a/include/wimlib/decompress.h +++ b/include/wimlib/decompress.h @@ -13,10 +13,6 @@ #include "wimlib/endianness.h" #include "wimlib/types.h" -/* Must be at least 32 bits. */ -typedef u32 input_bitbuf_t; -#define INPUT_BITBUF_BITS (sizeof(input_bitbuf_t) * 8) - #ifndef INPUT_IDX_T_DEFINED #define INPUT_IDX_T_DEFINED typedef u32 input_idx_t; @@ -32,15 +28,15 @@ struct input_bitstream { /* A variable of length at least 32 bits that is used to hold bits that * have been read from the stream. The bits are ordered from high-order * to low-order, and the next bit is always the high-order bit. */ - input_bitbuf_t bitbuf; - - /* Pointer to the next byte to be retrieved from the input. */ - const u8 *data; + u32 bitbuf; - /* Number of bits in @bitbuf that are valid. */ + /* Number of bits in @bitbuf that are valid. */ unsigned bitsleft; - /* Number of words of data that are left. */ + /* Pointer to the next byte to be retrieved from the input. */ + const u8 *data; + + /* Number of bytes of data that are left. */ input_idx_t data_bytes_left; }; @@ -56,32 +52,36 @@ init_input_bitstream(struct input_bitstream *istream, } /* Ensures that the bit buffer variable for the bitstream contains @num_bits - * bits, which must be 16 or fewer. + * bits. * * If there are at least @num_bits bits remaining in the bitstream, 0 is * returned. Otherwise, -1 is returned. */ static inline int bitstream_ensure_bits(struct input_bitstream *istream, unsigned num_bits) { - wimlib_assert2(num_bits <= 16); + for (int nbits = num_bits; (int)istream->bitsleft < nbits; nbits -= 16) { + u16 nextword; + unsigned shift; - if (istream->bitsleft >= num_bits) - return 0; + if (unlikely(istream->data_bytes_left < 2)) + return -1; - if (unlikely(istream->data_bytes_left < 2)) - return -1; + wimlib_assert2(istream->bitsleft <= sizeof(istream->bitbuf) * 8 - 16); - istream->bitbuf |= le16_to_cpu(*(le16*)istream->data) << - (INPUT_BITBUF_BITS - 16 - istream->bitsleft); - istream->data += 2; - istream->bitsleft += 16; - istream->data_bytes_left -= 2; + nextword = le16_to_cpu(*(const le16*)istream->data); + shift = sizeof(istream->bitbuf) * 8 - 16 - istream->bitsleft; + istream->bitbuf |= (u32)nextword << shift; + istream->data += 2; + istream->bitsleft += 16; + istream->data_bytes_left -= 2; + + } return 0; } /* Returns the next @num_bits bits in the buffer variable, which must contain at * least @num_bits bits, for the bitstream. */ -static inline unsigned +static inline u32 bitstream_peek_bits(const struct input_bitstream *istream, unsigned num_bits) { wimlib_assert2(istream->bitsleft >= num_bits); @@ -89,7 +89,7 @@ bitstream_peek_bits(const struct input_bitstream *istream, unsigned num_bits) if (unlikely(num_bits == 0)) return 0; - return istream->bitbuf >> (INPUT_BITBUF_BITS - num_bits); + return istream->bitbuf >> (sizeof(istream->bitbuf) * 8 - num_bits); } /* Removes @num_bits bits from the buffer variable, which must contain at least @@ -105,21 +105,19 @@ bitstream_remove_bits(struct input_bitstream *istream, unsigned num_bits) /* Gets and removes @num_bits bits from the buffer variable, which must contain * at least @num_bits bits, for the bitstream. */ -static inline unsigned -bitstream_pop_bits(struct input_bitstream *istream, - unsigned num_bits) +static inline u32 +bitstream_pop_bits(struct input_bitstream *istream, unsigned num_bits) { - unsigned n = bitstream_peek_bits(istream, num_bits); + u32 n = bitstream_peek_bits(istream, num_bits); bitstream_remove_bits(istream, num_bits); return n; } -/* Reads @num_bits bits from the input bitstream. @num_bits must be 16 or - * fewer. On success, returns 0 and returns the requested bits in @n. If there - * are fewer than @num_bits remaining in the bitstream, -1 is returned. */ +/* Reads @num_bits bits from the input bitstream. On success, returns 0 and + * returns the requested bits in @n. If there are fewer than @num_bits + * remaining in the bitstream, -1 is returned. */ static inline int -bitstream_read_bits(struct input_bitstream *istream, - unsigned num_bits, unsigned *n) +bitstream_read_bits(struct input_bitstream *istream, unsigned num_bits, u32 *n) { if (unlikely(bitstream_ensure_bits(istream, num_bits))) return -1; @@ -142,10 +140,10 @@ bitstream_read_byte(struct input_bitstream *istream) /* Reads @num_bits bits from the buffer variable for a bistream without checking * to see if that many bits are in the buffer or not. */ -static inline unsigned +static inline u32 bitstream_read_bits_nocheck(struct input_bitstream *istream, unsigned num_bits) { - unsigned n = bitstream_peek_bits(istream, num_bits); + u32 n = bitstream_peek_bits(istream, num_bits); bitstream_remove_bits(istream, num_bits); return n; } diff --git a/src/compress.c b/src/compress.c index 76043d63..3a936347 100644 --- a/src/compress.c +++ b/src/compress.c @@ -28,6 +28,7 @@ #endif #include "wimlib/assert.h" +#include "wimlib/endianness.h" #include "wimlib/compiler.h" #include "wimlib/compress.h" #include "wimlib/util.h" @@ -126,7 +127,7 @@ init_output_bitstream(struct output_bitstream *ostream, } typedef struct { - freq_t freq; + input_idx_t freq; u16 sym; union { u16 path_len; @@ -250,7 +251,7 @@ huffman_tree_compute_path_lengths(HuffmanNode *base_node, u16 cur_len) void make_canonical_huffman_code(unsigned num_syms, unsigned max_codeword_len, - const freq_t freq_tab[restrict], + const input_idx_t freq_tab[restrict], u8 lens[restrict], u16 codewords[restrict]) { diff --git a/src/lz77.c b/src/lz77.c index 049b3225..3b66dee8 100644 --- a/src/lz77.c +++ b/src/lz77.c @@ -198,13 +198,13 @@ longest_match(const u8 window[], unsigned bytes_remaining, * @prev_tab: Temporary space containing least @window_size elements. */ void -lz_analyze_block(const u8 window[], +lz_analyze_block(const u8 window[restrict], input_idx_t window_size, lz_record_match_t record_match, lz_record_literal_t record_literal, void *record_ctx, const struct lz_params *params, - input_idx_t prev_tab[]) + input_idx_t prev_tab[restrict]) { unsigned cur_input_pos = 0; unsigned hash = 0; diff --git a/src/lzx-compress.c b/src/lzx-compress.c index 04e5fbfb..ae1cf152 100644 --- a/src/lzx-compress.c +++ b/src/lzx-compress.c @@ -190,6 +190,7 @@ #include "wimlib.h" #include "wimlib/compress.h" +#include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/lzx.h" #include "wimlib/util.h" @@ -249,9 +250,9 @@ struct lzx_codes { /* Tables for tallying symbol frequencies in the three LZX alphabets */ struct lzx_freqs { - freq_t main[LZX_MAINCODE_MAX_NUM_SYMBOLS]; - freq_t len[LZX_LENCODE_NUM_SYMBOLS]; - freq_t aligned[LZX_ALIGNEDCODE_NUM_SYMBOLS]; + input_idx_t main[LZX_MAINCODE_MAX_NUM_SYMBOLS]; + input_idx_t len[LZX_LENCODE_NUM_SYMBOLS]; + input_idx_t aligned[LZX_ALIGNEDCODE_NUM_SYMBOLS]; }; /* LZX intermediate match/literal format */ @@ -654,7 +655,7 @@ static unsigned lzx_build_precode(const u8 lens[restrict], const u8 prev_lens[restrict], const unsigned num_syms, - freq_t precode_freqs[restrict LZX_PRECODE_NUM_SYMBOLS], + input_idx_t precode_freqs[restrict LZX_PRECODE_NUM_SYMBOLS], u8 output_syms[restrict num_syms], u8 precode_lens[restrict LZX_PRECODE_NUM_SYMBOLS], u16 precode_codewords[restrict LZX_PRECODE_NUM_SYMBOLS], @@ -813,7 +814,7 @@ lzx_write_compressed_code(struct output_bitstream *out, const u8 prev_lens[restrict], unsigned num_syms) { - freq_t precode_freqs[LZX_PRECODE_NUM_SYMBOLS]; + input_idx_t precode_freqs[LZX_PRECODE_NUM_SYMBOLS]; u8 output_syms[num_syms]; u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS]; u16 precode_codewords[LZX_PRECODE_NUM_SYMBOLS]; diff --git a/src/lzx-decompress.c b/src/lzx-decompress.c index 4047f956..ec8ccf25 100644 --- a/src/lzx-decompress.c +++ b/src/lzx-decompress.c @@ -210,7 +210,7 @@ lzx_read_code_lens(struct input_bitstream *istream, u8 lens[], _aligned_attribute(DECODE_TABLE_ALIGNMENT); u8 pretree_lens[LZX_PRECODE_NUM_SYMBOLS]; unsigned i; - unsigned len; + u32 len; int ret; /* Read the code lengths of the pretree codes. There are 20 lengths of @@ -244,9 +244,9 @@ lzx_read_code_lens(struct input_bitstream *istream, u8 lens[], * the next lengths are all equal to the next symbol in the * input. */ unsigned tree_code; - unsigned num_zeroes; + u32 num_zeroes; unsigned code; - unsigned num_same; + u32 num_same; signed char value; ret = read_huffsym_using_pretree(istream, pretree_decode_table, @@ -335,15 +335,10 @@ lzx_read_block_header(struct input_bitstream *istream, int ret; unsigned block_type; unsigned block_size; - unsigned s; - unsigned i; - unsigned len; ret = bitstream_ensure_bits(istream, 4); - if (ret) { - LZX_DEBUG("LZX input stream overrun"); + if (ret) return ret; - } /* The first three bits tell us what kind of block it is, and are one * of the LZX_BLOCKTYPE_* values. */ @@ -352,11 +347,10 @@ lzx_read_block_header(struct input_bitstream *istream, /* Read the block size. This mirrors the behavior * lzx_write_compressed_block() in lzx-compress.c; see that for more * details. */ - s = bitstream_read_bits_nocheck(istream, 1); - if (s) { + if (bitstream_read_bits_nocheck(istream, 1)) { block_size = LZX_DEFAULT_BLOCK_SIZE; } else { - unsigned tmp; + u32 tmp; block_size = 0; ret = bitstream_read_bits(istream, 8, &tmp); @@ -384,7 +378,9 @@ lzx_read_block_header(struct input_bitstream *istream, /* Read the path lengths for the elements of the aligned tree, * then build it. */ - for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) { + for (unsigned i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) { + u32 len; + ret = bitstream_read_bits(istream, LZX_ALIGNEDCODE_ELEMENT_SIZE, &len); @@ -564,8 +560,8 @@ lzx_decode_match(unsigned main_element, int block_type, unsigned match_offset; unsigned additional_len; unsigned num_extra_bits; - unsigned verbatim_bits; - unsigned aligned_bits; + u32 verbatim_bits; + u32 aligned_bits; unsigned i; int ret; u8 *match_dest; diff --git a/src/xpress-compress.c b/src/xpress-compress.c index 4b246949..ad9dcff7 100644 --- a/src/xpress-compress.c +++ b/src/xpress-compress.c @@ -99,7 +99,7 @@ xpress_write_matches_and_literals(struct output_bitstream *ostream, } struct xpress_record_ctx { - freq_t freqs[XPRESS_NUM_SYMBOLS]; + input_idx_t freqs[XPRESS_NUM_SYMBOLS]; struct xpress_match *matches; };