]> wimlib.net Git - wimlib/blobdiff - src/lzx_compress.c
lzx_compress: use 'restrict' qualifier for in_begin
[wimlib] / src / lzx_compress.c
index 520c07e33572081b7756a9c3622733522569817a..ccac6d275332be373b130df42075077def1c527a 100644 (file)
  * unknown.  In reality, each token in LZX requires a whole number of bits to
  * output.
  */
-#define LZX_BIT_COST           16
+#define LZX_BIT_COST           64
 
 /*
  * Should the compressor take into account the costs of aligned offset symbols?
  * lower than the limits defined by the LZX format.  This does not significantly
  * affect the compression ratio, at least for the block sizes we use.
  */
-#define MAIN_CODEWORD_LIMIT    12      /* 64-bit: can buffer 4 main symbols  */
+#define MAIN_CODEWORD_LIMIT    16
 #define LENGTH_CODEWORD_LIMIT  12
 #define ALIGNED_CODEWORD_LIMIT 7
 #define PRE_CODEWORD_LIMIT     7
@@ -586,13 +586,13 @@ lzx_flush_bits(struct lzx_output_bitstream *os, unsigned max_num_bits)
 
        if (os->end - os->next < 6)
                return;
-       put_unaligned_u16_le(os->bitbuf >> ((os->bitcount - 16) &
+       put_unaligned_le16(os->bitbuf >> ((os->bitcount - 16) &
                                            shift_mask), os->next + 0);
        if (max_num_bits > 16)
-               put_unaligned_u16_le(os->bitbuf >> ((os->bitcount - 32) &
+               put_unaligned_le16(os->bitbuf >> ((os->bitcount - 32) &
                                                shift_mask), os->next + 2);
        if (max_num_bits > 32)
-               put_unaligned_u16_le(os->bitbuf >> ((os->bitcount - 48) &
+               put_unaligned_le16(os->bitbuf >> ((os->bitcount - 48) &
                                                shift_mask), os->next + 4);
        os->next += (os->bitcount >> 4) << 1;
        os->bitcount &= 15;
@@ -617,7 +617,7 @@ lzx_flush_output(struct lzx_output_bitstream *os)
                return 0;
 
        if (os->bitcount != 0) {
-               put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), os->next);
+               put_unaligned_le16(os->bitbuf << (16 - os->bitcount), os->next);
                os->next += 2;
        }
 
@@ -893,27 +893,24 @@ lzx_write_sequences(struct lzx_output_bitstream *os, int block_type,
 
                        /* Verify optimization is enabled on 64-bit  */
                        STATIC_ASSERT(sizeof(machine_word_t) < 8 ||
-                                     CAN_BUFFER(4 * MAIN_CODEWORD_LIMIT));
+                                     CAN_BUFFER(3 * MAIN_CODEWORD_LIMIT));
 
-                       if (CAN_BUFFER(4 * MAIN_CODEWORD_LIMIT)) {
+                       if (CAN_BUFFER(3 * MAIN_CODEWORD_LIMIT)) {
 
-                               /* 64-bit: write 4 literals at a time.  */
-                               while (litrunlen >= 4) {
+                               /* 64-bit: write 3 literals at a time.  */
+                               while (litrunlen >= 3) {
                                        unsigned lit0 = block_data[0];
                                        unsigned lit1 = block_data[1];
                                        unsigned lit2 = block_data[2];
-                                       unsigned lit3 = block_data[3];
                                        lzx_add_bits(os, codes->codewords.main[lit0],
                                                     codes->lens.main[lit0]);
                                        lzx_add_bits(os, codes->codewords.main[lit1],
                                                     codes->lens.main[lit1]);
                                        lzx_add_bits(os, codes->codewords.main[lit2],
                                                     codes->lens.main[lit2]);
-                                       lzx_add_bits(os, codes->codewords.main[lit3],
-                                                    codes->lens.main[lit3]);
-                                       lzx_flush_bits(os, 4 * MAIN_CODEWORD_LIMIT);
-                                       block_data += 4;
-                                       litrunlen -= 4;
+                                       lzx_flush_bits(os, 3 * MAIN_CODEWORD_LIMIT);
+                                       block_data += 3;
+                                       litrunlen -= 3;
                                }
                                if (litrunlen--) {
                                        unsigned lit = *block_data++;
@@ -923,14 +920,7 @@ lzx_write_sequences(struct lzx_output_bitstream *os, int block_type,
                                                unsigned lit = *block_data++;
                                                lzx_add_bits(os, codes->codewords.main[lit],
                                                             codes->lens.main[lit]);
-                                               if (litrunlen--) {
-                                                       unsigned lit = *block_data++;
-                                                       lzx_add_bits(os, codes->codewords.main[lit],
-                                                                    codes->lens.main[lit]);
-                                                       lzx_flush_bits(os, 3 * MAIN_CODEWORD_LIMIT);
-                                               } else {
-                                                       lzx_flush_bits(os, 2 * MAIN_CODEWORD_LIMIT);
-                                               }
+                                               lzx_flush_bits(os, 2 * MAIN_CODEWORD_LIMIT);
                                        } else {
                                                lzx_flush_bits(os, 1 * MAIN_CODEWORD_LIMIT);
                                        }
@@ -1678,8 +1668,8 @@ lzx_set_default_costs(struct lzx_compressor *c, const u8 *block, u32 block_size)
        bool have_byte[256];
        unsigned num_used_bytes;
 
-       /* The costs below are hard coded to use a scaling factor of 16.  */
-       STATIC_ASSERT(LZX_BIT_COST == 16);
+       /* The costs below are hard coded to use a scaling factor of 64.  */
+       STATIC_ASSERT(LZX_BIT_COST == 64);
 
        /*
         * Heuristics:
@@ -1704,13 +1694,13 @@ lzx_set_default_costs(struct lzx_compressor *c, const u8 *block, u32 block_size)
                num_used_bytes += have_byte[i];
 
        for (i = 0; i < 256; i++)
-               c->costs.main[i] = 140 - (256 - num_used_bytes) / 4;
+               c->costs.main[i] = 560 - (256 - num_used_bytes);
 
        for (; i < c->num_main_syms; i++)
-               c->costs.main[i] = 170;
+               c->costs.main[i] = 680;
 
        for (i = 0; i < LZX_LENCODE_NUM_SYMBOLS; i++)
-               c->costs.len[i] = 103 + (i / 4);
+               c->costs.len[i] = 412 + i;
 
 #if LZX_CONSIDER_ALIGNED_COSTS
        for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++)
@@ -1795,11 +1785,11 @@ lzx_optimize_and_write_block(struct lzx_compressor * const restrict c,
  * simpler "greedy" or "lazy" parse while still being relatively fast.
  */
 static inline void
-lzx_compress_near_optimal(struct lzx_compressor *c,
-                         struct lzx_output_bitstream *os,
+lzx_compress_near_optimal(struct lzx_compressor * restrict c,
+                         const u8 * const restrict in_begin,
+                         struct lzx_output_bitstream * restrict os,
                          bool is_16_bit)
 {
-       const u8 * const in_begin = c->in_buffer;
        const u8 *       in_next = in_begin;
        const u8 * const in_end  = in_begin + c->in_nbytes;
        u32 max_len = LZX_MAX_MATCH_LEN;
@@ -1883,7 +1873,6 @@ lzx_compress_near_optimal(struct lzx_compressor *c,
                                                   bt_matchfinder_skip_position,
                                                   in_begin,
                                                   in_next - in_begin,
-                                                  max_len,
                                                   nice_len,
                                                   c->max_search_depth,
                                                   next_hashes);
@@ -1908,14 +1897,14 @@ static void
 lzx_compress_near_optimal_16(struct lzx_compressor *c,
                             struct lzx_output_bitstream *os)
 {
-       lzx_compress_near_optimal(c, os, true);
+       lzx_compress_near_optimal(c, c->in_buffer, os, true);
 }
 
 static void
 lzx_compress_near_optimal_32(struct lzx_compressor *c,
                             struct lzx_output_bitstream *os)
 {
-       lzx_compress_near_optimal(c, os, false);
+       lzx_compress_near_optimal(c, c->in_buffer, os, false);
 }
 
 /*