X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fxpress_compress.c;h=1b430912de7521d5b2abb622f6b1a694f7ab6281;hb=4a20aae0dd8469a352517a0b107416ffa99ccc55;hp=ba7a9af7272ecb3510d6a6ab04e7fec0319dd3cd;hpb=5ec910e4e9126a37eed1ff199d55a1952c76e0f7;p=wimlib diff --git a/src/xpress_compress.c b/src/xpress_compress.c index ba7a9af7..1b430912 100644 --- a/src/xpress_compress.c +++ b/src/xpress_compress.c @@ -212,7 +212,7 @@ struct xpress_output_bitstream { /* Pointer to the start of the output buffer. */ u8 *start; - /* Pointer to the location in the ouput buffer at which to write the + /* Pointer to the location in the output buffer at which to write the * next 16 bits. */ u8 *next_bits; @@ -279,7 +279,7 @@ xpress_init_output(struct xpress_output_bitstream *os, void *buffer, size_t size * If the output buffer space is exhausted, then the bits will be ignored, and * xpress_flush_output() will return 0 when it gets called. */ -static inline void +static forceinline void xpress_write_bits(struct xpress_output_bitstream *os, const u32 bits, const unsigned num_bits) { @@ -292,7 +292,7 @@ xpress_write_bits(struct xpress_output_bitstream *os, if (os->bitcount > 16) { os->bitcount -= 16; if (os->end - os->next_byte >= 2) { - put_unaligned_u16_le(os->bitbuf >> os->bitcount, os->next_bits); + put_unaligned_le16(os->bitbuf >> os->bitcount, os->next_bits); os->next_bits = os->next_bits2; os->next_bits2 = os->next_byte; os->next_byte += 2; @@ -303,7 +303,7 @@ xpress_write_bits(struct xpress_output_bitstream *os, /* * Interweave a literal byte into the output bitstream. */ -static inline void +static forceinline void xpress_write_byte(struct xpress_output_bitstream *os, u8 byte) { if (os->next_byte < os->end) @@ -313,11 +313,11 @@ xpress_write_byte(struct xpress_output_bitstream *os, u8 byte) /* * Interweave two literal bytes into the output bitstream. */ -static inline void +static forceinline void xpress_write_u16(struct xpress_output_bitstream *os, u16 v) { if (os->end - os->next_byte >= 2) { - put_unaligned_u16_le(v, os->next_byte); + put_unaligned_le16(v, os->next_byte); os->next_byte += 2; } } @@ -332,13 +332,13 @@ xpress_flush_output(struct xpress_output_bitstream *os) if (os->end - os->next_byte < 2) return 0; - put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), os->next_bits); - put_unaligned_u16_le(0, os->next_bits2); + put_unaligned_le16(os->bitbuf << (16 - os->bitcount), os->next_bits); + put_unaligned_le16(0, os->next_bits2); return os->next_byte - os->start; } -static inline void +static forceinline void xpress_write_extra_length_bytes(struct xpress_output_bitstream *os, unsigned adjusted_len) { @@ -353,7 +353,7 @@ xpress_write_extra_length_bytes(struct xpress_output_bitstream *os, } /* Output a match or literal. */ -static inline void +static forceinline void xpress_write_item(struct xpress_item item, struct xpress_output_bitstream *os, const u32 codewords[], const u8 lens[]) { @@ -413,7 +413,7 @@ xpress_write_item_list(struct xpress_output_bitstream *os, unsigned sym; adjusted_len = length - XPRESS_MIN_MATCH_LEN; - log2_offset = fls32(offset); + log2_offset = bsr32(offset); len_hdr = min(0xF, adjusted_len); sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr); @@ -484,7 +484,7 @@ xpress_write(struct xpress_compressor *c, void *out, size_t out_nbytes_avail, /* Tally the Huffman symbol for a literal and return the intermediate * representation of that literal. */ -static inline struct xpress_item +static forceinline struct xpress_item xpress_record_literal(struct xpress_compressor *c, unsigned literal) { c->freqs[literal]++; @@ -496,12 +496,12 @@ xpress_record_literal(struct xpress_compressor *c, unsigned literal) /* Tally the Huffman symbol for a match and return the intermediate * representation of that match. */ -static inline struct xpress_item +static forceinline struct xpress_item xpress_record_match(struct xpress_compressor *c, unsigned length, unsigned offset) { unsigned adjusted_len = length - XPRESS_MIN_MATCH_LEN; unsigned len_hdr = min(adjusted_len, 0xF); - unsigned log2_offset = fls32(offset); + unsigned log2_offset = bsr32(offset); unsigned sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr); c->freqs[sym]++; @@ -755,7 +755,7 @@ xpress_tally_item_list(struct xpress_compressor *c, unsigned sym; adjusted_len = length - XPRESS_MIN_MATCH_LEN; - log2_offset = fls32(offset); + log2_offset = bsr32(offset); len_hdr = min(0xF, adjusted_len); sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr); @@ -831,7 +831,7 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes, u32 offset_cost; offset = match->offset; - log2_offset = fls32(offset); + log2_offset = bsr32(offset); offset_cost = log2_offset; do { unsigned len_hdr; @@ -860,7 +860,7 @@ xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes, u32 offset_cost; offset = match->offset; - log2_offset = fls32(offset); + log2_offset = bsr32(offset); offset_cost = log2_offset; do { unsigned adjusted_len; @@ -907,7 +907,7 @@ xpress_find_matches(struct xpress_compressor * restrict c, const u8 * const in_begin = in; const u8 *in_next = in_begin; struct lz_match *cache_ptr = c->match_cache; - u32 next_hash = 0; + u32 next_hashes[2] = {}; u32 max_len = in_nbytes; u32 nice_len = min(max_len, c->nice_match_length); @@ -920,7 +920,8 @@ xpress_find_matches(struct xpress_compressor * restrict c, /* If we've found so many matches that the cache might overflow * if we keep finding more, then stop finding matches. This * case is very unlikely. */ - if (unlikely(cache_ptr >= c->cache_overflow_mark || max_len < 5)) + if (unlikely(cache_ptr >= c->cache_overflow_mark || + max_len < BT_MATCHFINDER_REQUIRED_NBYTES)) break; matches = cache_ptr; @@ -935,7 +936,7 @@ xpress_find_matches(struct xpress_compressor * restrict c, max_len, nice_len, c->max_search_depth, - &next_hash, + next_hashes, &best_len, cache_ptr); cache_ptr->length = cache_ptr - matches; @@ -955,17 +956,17 @@ xpress_find_matches(struct xpress_compressor * restrict c, * highly compressible, so it doesn't matter as much what we do. */ if (best_len >= nice_len) { - if (unlikely(best_len + 5 >= max_len)) + if (unlikely(best_len + + BT_MATCHFINDER_REQUIRED_NBYTES >= max_len)) break; --best_len; do { bt_matchfinder_skip_position(&c->bt_mf, in_begin, in_next - in_begin, - max_len, nice_len, c->max_search_depth, - &next_hash); + next_hashes); cache_ptr->length = 0; cache_ptr->offset = *in_next++; cache_ptr++; @@ -1090,12 +1091,12 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level, if (compression_level < 30) { c->impl = xpress_compress_greedy; - c->max_search_depth = (compression_level * 24) / 16; - c->nice_match_length = (compression_level * 48) / 16; + c->max_search_depth = (compression_level * 30) / 16; + c->nice_match_length = (compression_level * 60) / 16; } else { c->impl = xpress_compress_lazy; - c->max_search_depth = (compression_level * 24) / 32; - c->nice_match_length = (compression_level * 48) / 32; + c->max_search_depth = (compression_level * 30) / 32; + c->nice_match_length = (compression_level * 60) / 32; /* xpress_compress_lazy() needs max_search_depth >= 2 * because it halves the max_search_depth when @@ -1122,8 +1123,8 @@ xpress_create_compressor(size_t max_bufsize, unsigned compression_level, &c->match_cache[max_bufsize * CACHE_RESERVE_PER_POS]; c->impl = xpress_compress_near_optimal; - c->max_search_depth = (compression_level * 32) / 100; - c->nice_match_length = (compression_level * 50) / 100; + c->max_search_depth = (compression_level * 28) / 100; + c->nice_match_length = (compression_level * 56) / 100; c->num_optim_passes = compression_level / 40; } #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */