]> wimlib.net Git - wimlib/blobdiff - src/xpress_compress.c
Stop force-inlining everything marked 'inline'
[wimlib] / src / xpress_compress.c
index 776de7cdc4c33032f82a75d7d4333f041df36217..1b430912de7521d5b2abb622f6b1a694f7ab6281 100644 (file)
@@ -212,7 +212,7 @@ struct xpress_output_bitstream {
        /* Pointer to the start of the output buffer.  */
        u8 *start;
 
-       /* Pointer to the location in the ouput buffer at which to write the
+       /* Pointer to the location in the output buffer at which to write the
         * next 16 bits.  */
        u8 *next_bits;
 
@@ -279,7 +279,7 @@ xpress_init_output(struct xpress_output_bitstream *os, void *buffer, size_t size
  * If the output buffer space is exhausted, then the bits will be ignored, and
  * xpress_flush_output() will return 0 when it gets called.
  */
-static inline void
+static forceinline void
 xpress_write_bits(struct xpress_output_bitstream *os,
                  const u32 bits, const unsigned num_bits)
 {
@@ -292,7 +292,7 @@ xpress_write_bits(struct xpress_output_bitstream *os,
        if (os->bitcount > 16) {
                os->bitcount -= 16;
                if (os->end - os->next_byte >= 2) {
-                       put_unaligned_u16_le(os->bitbuf >> os->bitcount, os->next_bits);
+                       put_unaligned_le16(os->bitbuf >> os->bitcount, os->next_bits);
                        os->next_bits = os->next_bits2;
                        os->next_bits2 = os->next_byte;
                        os->next_byte += 2;
@@ -303,7 +303,7 @@ xpress_write_bits(struct xpress_output_bitstream *os,
 /*
  * Interweave a literal byte into the output bitstream.
  */
-static inline void
+static forceinline void
 xpress_write_byte(struct xpress_output_bitstream *os, u8 byte)
 {
        if (os->next_byte < os->end)
@@ -313,11 +313,11 @@ xpress_write_byte(struct xpress_output_bitstream *os, u8 byte)
 /*
  * Interweave two literal bytes into the output bitstream.
  */
-static inline void
+static forceinline void
 xpress_write_u16(struct xpress_output_bitstream *os, u16 v)
 {
        if (os->end - os->next_byte >= 2) {
-               put_unaligned_u16_le(v, os->next_byte);
+               put_unaligned_le16(v, os->next_byte);
                os->next_byte += 2;
        }
 }
@@ -332,13 +332,13 @@ xpress_flush_output(struct xpress_output_bitstream *os)
        if (os->end - os->next_byte < 2)
                return 0;
 
-       put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), os->next_bits);
-       put_unaligned_u16_le(0, os->next_bits2);
+       put_unaligned_le16(os->bitbuf << (16 - os->bitcount), os->next_bits);
+       put_unaligned_le16(0, os->next_bits2);
 
        return os->next_byte - os->start;
 }
 
-static inline void
+static forceinline void
 xpress_write_extra_length_bytes(struct xpress_output_bitstream *os,
                                unsigned adjusted_len)
 {
@@ -353,7 +353,7 @@ xpress_write_extra_length_bytes(struct xpress_output_bitstream *os,
 }
 
 /* Output a match or literal.  */
-static inline void
+static forceinline void
 xpress_write_item(struct xpress_item item, struct xpress_output_bitstream *os,
                  const u32 codewords[], const u8 lens[])
 {
@@ -413,7 +413,7 @@ xpress_write_item_list(struct xpress_output_bitstream *os,
                        unsigned sym;
 
                        adjusted_len = length - XPRESS_MIN_MATCH_LEN;
-                       log2_offset = fls32(offset);
+                       log2_offset = bsr32(offset);
                        len_hdr = min(0xF, adjusted_len);
                        sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
 
@@ -484,7 +484,7 @@ xpress_write(struct xpress_compressor *c, void *out, size_t out_nbytes_avail,
 
 /* Tally the Huffman symbol for a literal and return the intermediate
  * representation of that literal.  */
-static inline struct xpress_item
+static forceinline struct xpress_item
 xpress_record_literal(struct xpress_compressor *c, unsigned literal)
 {
        c->freqs[literal]++;
@@ -496,12 +496,12 @@ xpress_record_literal(struct xpress_compressor *c, unsigned literal)
 
 /* Tally the Huffman symbol for a match and return the intermediate
  * representation of that match.  */
-static inline struct xpress_item
+static forceinline struct xpress_item
 xpress_record_match(struct xpress_compressor *c, unsigned length, unsigned offset)
 {
        unsigned adjusted_len = length - XPRESS_MIN_MATCH_LEN;
        unsigned len_hdr = min(adjusted_len, 0xF);
-       unsigned log2_offset = fls32(offset);
+       unsigned log2_offset = bsr32(offset);
        unsigned sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
 
        c->freqs[sym]++;
@@ -755,7 +755,7 @@ xpress_tally_item_list(struct xpress_compressor *c,
                        unsigned sym;
 
                        adjusted_len = length - XPRESS_MIN_MATCH_LEN;
-                       log2_offset = fls32(offset);
+                       log2_offset = bsr32(offset);
                        len_hdr = min(0xF, adjusted_len);
                        sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
 
@@ -831,7 +831,7 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                                u32 offset_cost;
 
                                offset = match->offset;
-                               log2_offset = fls32(offset);
+                               log2_offset = bsr32(offset);
                                offset_cost = log2_offset;
                                do {
                                        unsigned len_hdr;
@@ -860,7 +860,7 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
                                u32 offset_cost;
 
                                offset = match->offset;
-                               log2_offset = fls32(offset);
+                               log2_offset = bsr32(offset);
                                offset_cost = log2_offset;
                                do {
                                        unsigned adjusted_len;
@@ -920,7 +920,8 @@ xpress_find_matches(struct xpress_compressor * restrict c,
                /* If we've found so many matches that the cache might overflow
                 * if we keep finding more, then stop finding matches.  This
                 * case is very unlikely.  */
-               if (unlikely(cache_ptr >= c->cache_overflow_mark || max_len < 5))
+               if (unlikely(cache_ptr >= c->cache_overflow_mark ||
+                            max_len < BT_MATCHFINDER_REQUIRED_NBYTES))
                        break;
 
                matches = cache_ptr;
@@ -955,14 +956,14 @@ xpress_find_matches(struct xpress_compressor * restrict c,
                 * highly compressible, so it doesn't matter as much what we do.
                 */
                if (best_len >= nice_len) {
-                       if (unlikely(best_len + 5 >= max_len))
+                       if (unlikely(best_len +
+                                    BT_MATCHFINDER_REQUIRED_NBYTES >= max_len))
                                break;
                        --best_len;
                        do {
                                bt_matchfinder_skip_position(&c->bt_mf,
                                                             in_begin,
                                                             in_next - in_begin,
-                                                            max_len,
                                                             nice_len,
                                                             c->max_search_depth,
                                                             next_hashes);
@@ -1090,12 +1091,12 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
 
                if (compression_level < 30) {
                        c->impl = xpress_compress_greedy;
-                       c->max_search_depth = (compression_level * 24) / 16;
-                       c->nice_match_length = (compression_level * 48) / 16;
+                       c->max_search_depth = (compression_level * 30) / 16;
+                       c->nice_match_length = (compression_level * 60) / 16;
                } else {
                        c->impl = xpress_compress_lazy;
-                       c->max_search_depth = (compression_level * 24) / 32;
-                       c->nice_match_length = (compression_level * 48) / 32;
+                       c->max_search_depth = (compression_level * 30) / 32;
+                       c->nice_match_length = (compression_level * 60) / 32;
 
                        /* xpress_compress_lazy() needs max_search_depth >= 2
                         * because it halves the max_search_depth when
@@ -1122,8 +1123,8 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
                        &c->match_cache[max_bufsize * CACHE_RESERVE_PER_POS];
 
                c->impl = xpress_compress_near_optimal;
-               c->max_search_depth = (compression_level * 32) / 100;
-               c->nice_match_length = (compression_level * 50) / 100;
+               c->max_search_depth = (compression_level * 28) / 100;
+               c->nice_match_length = (compression_level * 56) / 100;
                c->num_optim_passes = compression_level / 40;
        }
 #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */