]> wimlib.net Git - wimlib/blobdiff - include/wimlib/compress.h
Fix reading > 16 bits from bitstream
[wimlib] / include / wimlib / compress.h
index 60b4c0eaceafc122a8ee06eee335ba8acc2faf6d..fcd3c4119f16c9e37298a87760ddbd99b56186b3 100644 (file)
@@ -1,67 +1,73 @@
 /*
  * compress.h
  *
- * Functions useful for compression, mainly bitstreams.
+ * Header for compression code shared by multiple compression formats.
  */
 
 #ifndef _WIMLIB_COMPRESS_H
 #define _WIMLIB_COMPRESS_H
 
-#include "wimlib/endianness.h"
 #include "wimlib/types.h"
 
-typedef u16 output_bitbuf_t;
+/* Variable type that can represent all possible window positions.  */
+#ifndef INPUT_IDX_T_DEFINED
+#define INPUT_IDX_T_DEFINED
+typedef u32 input_idx_t;
+#endif
 
-/* Assuming that WIM chunks are at most 32768 bytes, 16 bits is enough for any
- * symbol frequency. */
-typedef u16 freq_t;
-
-/* Structure to keep track of the current position in the compressed output. */
+/* Structure to keep track of the current state sending bits and bytes to the
+ * compressed output buffer.  */
 struct output_bitstream {
 
-       /* A variable to buffer writing bits to the output and is flushed to the
-        * compressed output when full. */
-       output_bitbuf_t bitbuf;
+       /* Variable that holds up to 16 bits that haven't yet been flushed to
+        * the output.  */
+       u16 bitbuf;
 
-       /* Number of free bits in @bitbuf */
+       /* Number of free bits in @bitbuf; that is, 16 minus the number of valid
+        * bits in @bitbuf.  */
        unsigned free_bits;
 
+       /* Pointer to the start of the output buffer.  */
+       u8 *output_start;
+
+       /* Position at which to write the next 16 bits.  */
        u8 *bit_output;
+
+       /* Next position to write 16 bits, after they are written to bit_output.
+        * This is after @next_bit_output and may be separated from @bit_output
+        * by literal bytes.  */
        u8 *next_bit_output;
 
-       /* Pointer to the next byte in the compressed output. */
+       /* Next position to write literal bytes.  This is after @bit_output and
+        * @next_bit_output, and may be separated from them by literal bytes.
+        */
        u8 *output;
 
+       /* Number of bytes remaining in the @output buffer.  */
+       input_idx_t bytes_remaining;
 
-       /* Number of bytes left in the memory pointed to by @output. */
-       int num_bytes_remaining;
+       /* Set to true if the buffer has been exhausted.  */
+       bool overrun;
 };
 
-static inline int
-bitstream_put_byte(struct output_bitstream *ostream, u8 n)
-{
-       if (ostream->num_bytes_remaining < 1)
-               return 1;
-       *ostream->output = n;
-       ostream->output++;
-       ostream->num_bytes_remaining--;
-       return 0;
-}
-
-static inline int
-bitstream_put_two_bytes(struct output_bitstream *ostream, u16 n)
-{
-       if (ostream->num_bytes_remaining < 2)
-               return 1;
-       *(u16*)ostream->output = cpu_to_le16(n);
-       ostream->output += 2;
-       ostream->num_bytes_remaining -= 2;
-       return 0;
-}
+extern void
+init_output_bitstream(struct output_bitstream *ostream,
+                     void *data, unsigned num_bytes);
+
+extern input_idx_t
+flush_output_bitstream(struct output_bitstream *ostream);
+
+extern void
+bitstream_put_bits(struct output_bitstream *ostream,
+                  u32 bits, unsigned num_bits);
+
+extern void
+bitstream_put_byte(struct output_bitstream *ostream, u8 n);
 
 struct lz_params {
        unsigned min_match;
        unsigned max_match;
+       unsigned max_offset;
        unsigned nice_match;
        unsigned good_match;
        unsigned max_chain_len;
@@ -69,35 +75,23 @@ struct lz_params {
        unsigned too_far;
 };
 
-typedef unsigned (*lz_record_match_t)(unsigned, unsigned, void *, void *);
-typedef unsigned (*lz_record_literal_t)(u8, void *);
+typedef void (*lz_record_match_t)(unsigned len, unsigned offset, void *ctx);
+typedef void (*lz_record_literal_t)(u8 lit, void *ctx);
 
-extern unsigned
-lz_analyze_block(const u8 uncompressed_data[],
-                unsigned uncompressed_len,
-                u32 match_tab[],
+extern void
+lz_analyze_block(const u8 window[restrict],
+                input_idx_t window_size,
                 lz_record_match_t record_match,
                 lz_record_literal_t record_literal,
-                void *record_match_arg1,
-                void *record_match_arg2,
-                void *record_literal_arg,
-                const struct lz_params *params);
-
-extern int bitstream_put_bits(struct output_bitstream *ostream,
-                             output_bitbuf_t bits, unsigned num_bits);
-
-extern void
-init_output_bitstream(struct output_bitstream *ostream,
-                     void *data, unsigned num_bytes);
-
-extern int
-flush_output_bitstream(struct output_bitstream *ostream);
+                void *record_ctx,
+                const struct lz_params *params,
+                input_idx_t prev_tab[restrict]);
 
 extern void
 make_canonical_huffman_code(unsigned num_syms,
                            unsigned max_codeword_len,
-                           const freq_t freq_tab[],
-                           u8 lens[],
-                           u16 codewords[]);
+                           const input_idx_t freq_tab[restrict],
+                           u8 lens[restrict],
+                           u16 codewords[restrict]);
 
 #endif /* _WIMLIB_COMPRESS_H */