From 5ec910e4e9126a37eed1ff199d55a1952c76e0f7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 19 Sep 2015 13:56:14 -0500 Subject: [PATCH] bt_matchfinder: make callers do max_len check --- include/wimlib/bt_matchfinder.h | 9 ++----- src/lzx_compress.c | 16 ++++-------- src/xpress_compress.c | 46 ++++++++++++++++++--------------- 3 files changed, 32 insertions(+), 39 deletions(-) diff --git a/include/wimlib/bt_matchfinder.h b/include/wimlib/bt_matchfinder.h index c2fa8b82..1a944978 100644 --- a/include/wimlib/bt_matchfinder.h +++ b/include/wimlib/bt_matchfinder.h @@ -147,11 +147,6 @@ TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * co u32 len; u32 best_len = 2; - if (unlikely(max_len < LOAD_U24_REQUIRED_NBYTES + 1)) { - *best_len_ret = best_len; - return lz_matchptr; - } - hash3 = *next_hash; *next_hash = lz_hash(load_u24_unaligned(in_next + 1), BT_MATCHFINDER_HASH3_ORDER); prefetchw(&mf->hash3_tab[*next_hash]); @@ -246,7 +241,7 @@ TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * co * The current position in the input buffer (the position of the sequence * being matched against). * @max_len - * The maximum permissible match length at this position. + * The maximum permissible match length at this position. Must be >= 5. * @nice_len * Stop searching if a match of at least this length is found. * Must be <= @max_len. @@ -305,7 +300,7 @@ TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) *mf, * @cur_pos * The current position in the input buffer. * @max_len - * The maximum permissible match length at this position. + * The maximum permissible match length at this position. Must be >= 5. * @nice_len * Stop searching if a match of at least this length is found. * @max_search_depth diff --git a/src/lzx_compress.c b/src/lzx_compress.c index 3e19bd19..d36f8aaa 100644 --- a/src/lzx_compress.c +++ b/src/lzx_compress.c @@ -1779,8 +1779,8 @@ lzx_compress_near_optimal(struct lzx_compressor *c, const u8 * const in_begin = c->in_buffer; const u8 * in_next = in_begin; const u8 * const in_end = in_begin + c->in_nbytes; - unsigned max_len = LZX_MAX_MATCH_LEN; - unsigned nice_len = min(c->nice_match_length, max_len); + u32 max_len = LZX_MAX_MATCH_LEN; + u32 nice_len = min(c->nice_match_length, max_len); u32 next_hash = 0; struct lzx_lru_queue queue; @@ -1797,20 +1797,14 @@ lzx_compress_near_optimal(struct lzx_compressor *c, struct lz_match *cache_ptr = c->match_cache; do { struct lz_match *lz_matchptr; - unsigned best_len; + u32 best_len; /* If approaching the end of the input buffer, adjust * 'max_len' and 'nice_len' accordingly. */ if (unlikely(max_len > in_end - in_next)) { max_len = in_end - in_next; nice_len = min(max_len, nice_len); - - /* This extra check is needed to ensure that we - * never output a length 2 match of the very - * last two bytes with the very first two bytes, - * since such a match has an offset too large to - * be represented. */ - if (unlikely(max_len < 3)) { + if (unlikely(max_len < 5)) { in_next++; cache_ptr->length = 0; cache_ptr++; @@ -1850,7 +1844,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c, if (unlikely(max_len > in_end - in_next)) { max_len = in_end - in_next; nice_len = min(max_len, nice_len); - if (unlikely(max_len < 3)) { + if (unlikely(max_len < 5)) { in_next++; cache_ptr->length = 0; cache_ptr++; diff --git a/src/xpress_compress.c b/src/xpress_compress.c index bce0901f..ba7a9af7 100644 --- a/src/xpress_compress.c +++ b/src/xpress_compress.c @@ -906,27 +906,22 @@ xpress_find_matches(struct xpress_compressor * restrict c, { const u8 * const in_begin = in; const u8 *in_next = in_begin; - const u8 * const in_end = in_begin + in_nbytes; struct lz_match *cache_ptr = c->match_cache; u32 next_hash = 0; + u32 max_len = in_nbytes; + u32 nice_len = min(max_len, c->nice_match_length); bt_matchfinder_init(&c->bt_mf); - do { + for (;;) { struct lz_match *matches; - unsigned best_len; + u32 best_len; /* If we've found so many matches that the cache might overflow * if we keep finding more, then stop finding matches. This * case is very unlikely. */ - if (unlikely(cache_ptr >= c->cache_overflow_mark)) { - do { - cache_ptr->length = 0; - cache_ptr->offset = *in_next++; - cache_ptr++; - } while (in_next != in_end); - return cache_ptr; - } + if (unlikely(cache_ptr >= c->cache_overflow_mark || max_len < 5)) + break; matches = cache_ptr; @@ -937,16 +932,17 @@ xpress_find_matches(struct xpress_compressor * restrict c, bt_matchfinder_get_matches(&c->bt_mf, in_begin, in_next - in_begin, - in_end - in_next, - min(in_end - in_next, c->nice_match_length), + max_len, + nice_len, c->max_search_depth, &next_hash, &best_len, cache_ptr); cache_ptr->length = cache_ptr - matches; - cache_ptr->offset = *in_next; - in_next++; + cache_ptr->offset = *in_next++; cache_ptr++; + max_len--; + nice_len = min(nice_len, max_len); /* * If there was a very long match found, then don't cache any @@ -958,24 +954,32 @@ xpress_find_matches(struct xpress_compressor * restrict c, * very much. If there's a long match, then the data must be * highly compressible, so it doesn't matter as much what we do. */ - if (best_len >= c->nice_match_length) { + if (best_len >= nice_len) { + if (unlikely(best_len + 5 >= max_len)) + break; --best_len; do { bt_matchfinder_skip_position(&c->bt_mf, in_begin, in_next - in_begin, - in_end - in_next, - min(in_end - in_next, - c->nice_match_length), + max_len, + nice_len, c->max_search_depth, &next_hash); - cache_ptr->length = 0; cache_ptr->offset = *in_next++; cache_ptr++; + max_len--; + nice_len = min(nice_len, max_len); } while (--best_len); } - } while (in_next != in_end); + } + + while (max_len--) { + cache_ptr->length = 0; + cache_ptr->offset = *in_next++; + cache_ptr++; + } return cache_ptr; } -- 2.43.0