#endif
#include "wimlib/assert.h"
+#include "wimlib/endianness.h"
#include "wimlib/compiler.h"
#include "wimlib/compress.h"
#include "wimlib/util.h"
/* Writes @num_bits bits, given by the @num_bits least significant bits of
* @bits, to the output @ostream. */
void
-bitstream_put_bits(struct output_bitstream *ostream, output_bitbuf_t bits,
+bitstream_put_bits(struct output_bitstream *ostream, u32 bits,
unsigned num_bits)
{
- unsigned rem_bits;
+ bits &= (1U << num_bits) - 1;
+ while (num_bits > ostream->free_bits) {
+ /* Buffer variable does not have space for the new bits. It
+ * needs to be flushed as a 16-bit integer. Bits in the second
+ * byte logically precede those in the first byte
+ * (little-endian), but within each byte the bits are ordered
+ * from high to low. This is true for both XPRESS and LZX
+ * compression. */
+
+ /* There must be at least 2 bytes of space remaining. */
+ if (unlikely(ostream->bytes_remaining < 2)) {
+ ostream->overrun = true;
+ return;
+ }
- if (num_bits <= ostream->free_bits) {
- /* Buffer variable had space for the new bits. */
- ostream->bitbuf = (ostream->bitbuf << num_bits) | bits;
- ostream->free_bits -= num_bits;
- return;
- }
+ /* Fill the buffer with as many bits that fit. */
+ unsigned fill_bits = ostream->free_bits;
- /* Buffer variable does not have space for the new bits. It needs to be
- * flushed as a 16-bit integer. Bits in the second byte logically
- * precede those in the first byte (little-endian), but within each byte
- * the bits are ordered from high to low. This is true for both XPRESS
- * and LZX compression. */
+ ostream->bitbuf <<= fill_bits;
+ ostream->bitbuf |= bits >> (num_bits - fill_bits);
- wimlib_assert(num_bits <= 16);
+ *(le16*)ostream->bit_output = cpu_to_le16(ostream->bitbuf);
+ ostream->bit_output = ostream->next_bit_output;
+ ostream->next_bit_output = ostream->output;
+ ostream->output += 2;
+ ostream->bytes_remaining -= 2;
- /* There must be at least 2 bytes of space remaining. */
- if (unlikely(ostream->bytes_remaining < 2)) {
- ostream->overrun = true;
- return;
+ ostream->free_bits = 16;
+ num_bits -= fill_bits;
+ bits &= (1U << num_bits) - 1;
}
- /* Fill the buffer with as many bits that fit. */
- rem_bits = num_bits - ostream->free_bits;
- ostream->bitbuf <<= ostream->free_bits;
- ostream->bitbuf |= bits >> rem_bits;
-
- *(le16*)ostream->bit_output = cpu_to_le16(ostream->bitbuf);
- ostream->bit_output = ostream->next_bit_output;
- ostream->next_bit_output = ostream->output;
- ostream->output += 2;
- ostream->bytes_remaining -= 2;
-
- ostream->free_bits = 16 - rem_bits;
- ostream->bitbuf = bits;
+ /* Buffer variable has space for the new bits. */
+ ostream->bitbuf = (ostream->bitbuf << num_bits) | bits;
+ ostream->free_bits -= num_bits;
}
void
ostream->bit_output = data;
ostream->next_bit_output = data + 2;
ostream->output = data + 4;
- ostream->bytes_remaining = num_bytes;
+ ostream->bytes_remaining = num_bytes - 4;
ostream->overrun = false;
}
typedef struct {
- u32 freq;
+ input_idx_t freq;
u16 sym;
union {
u16 path_len;
const HuffmanNode *leaf1 = _leaf1;
const HuffmanNode *leaf2 = _leaf2;
- int freq_diff = (int)leaf1->freq - (int)leaf2->freq;
-
- if (freq_diff == 0)
- return (int)leaf1->sym - (int)leaf2->sym;
+ if (leaf1->freq > leaf2->freq)
+ return 1;
+ else if (leaf1->freq < leaf2->freq)
+ return -1;
else
- return freq_diff;
+ return (int)leaf1->sym - (int)leaf2->sym;
}
/* Comparator function for HuffmanNodes. Sorts primarily by code length and
* @num_syms: The number of symbols in the alphabet.
*
* @max_codeword_len: The maximum allowed length of a codeword in the code.
- * Note that if the code being created runs up against
- * this restriction, the code ultimately created will be
- * suboptimal, although there are some advantages for
- * limiting the length of the codewords.
+ * Note that if the code being created runs up against
+ * this restriction, the code ultimately created will be
+ * suboptimal, although there are some advantages for
+ * limiting the length of the codewords.
*
* @freq_tab: An array of length @num_syms that contains the frequencies
- * of each symbol in the uncompressed data.
+ * of each symbol in the uncompressed data.
*
* @lens: An array of length @num_syms into which the lengths of the
- * codewords for each symbol will be written.
+ * codewords for each symbol will be written.
*
* @codewords: An array of @num_syms short integers into which the
- * codewords for each symbol will be written. The first
- * lens[i] bits of codewords[i] will contain the codeword
- * for symbol i.
+ * codewords for each symbol will be written. The first
+ * lens[i] bits of codewords[i] will contain the codeword
+ * for symbol i.
*/
void
make_canonical_huffman_code(unsigned num_syms,
unsigned max_codeword_len,
- const freq_t freq_tab[restrict],
+ const input_idx_t freq_tab[restrict],
u8 lens[restrict],
u16 codewords[restrict])
{
/* We require at least 2 possible symbols in the alphabet to produce a
* valid Huffman decoding table. It is allowed that fewer than 2 symbols
* are actually used, though. */
- wimlib_assert(num_syms >= 2);
+ wimlib_assert(num_syms >= 2 && num_syms < INVALID_SYMBOL);
/* Initialize the lengths and codewords to 0 */
memset(lens, 0, num_syms * sizeof(lens[0]));
* log_2(num_used_symbols).
* */
for (unsigned i = 0; i < num_used_symbols; i++)
- if (leaves[i].freq > 1)
- leaves[i].freq >>= 1;
+ leaves[i].freq = (leaves[i].freq + 1) >> 1;
+
goto try_building_tree_again;
}
next_inode++;