bt_matchfinder: make callers do max_len check
authorEric Biggers <ebiggers3@gmail.com>
Sat, 19 Sep 2015 18:56:14 +0000 (13:56 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Sun, 27 Sep 2015 14:41:28 +0000 (09:41 -0500)
include/wimlib/bt_matchfinder.h
src/lzx_compress.c
src/xpress_compress.c

index c2fa8b8297b1a94deaed2a6aef1e9679587aaadb..1a94497845cd0a4d34380c8d7f69ba812f7e08a2 100644 (file)
@@ -147,11 +147,6 @@ TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * co
        u32 len;
        u32 best_len = 2;
 
-       if (unlikely(max_len < LOAD_U24_REQUIRED_NBYTES + 1)) {
-               *best_len_ret = best_len;
-               return lz_matchptr;
-       }
-
        hash3 = *next_hash;
        *next_hash = lz_hash(load_u24_unaligned(in_next + 1), BT_MATCHFINDER_HASH3_ORDER);
        prefetchw(&mf->hash3_tab[*next_hash]);
@@ -246,7 +241,7 @@ TEMPLATED(bt_matchfinder_advance_one_byte)(struct TEMPLATED(bt_matchfinder) * co
  *     The current position in the input buffer (the position of the sequence
  *     being matched against).
  * @max_len
- *     The maximum permissible match length at this position.
+ *     The maximum permissible match length at this position.  Must be >= 5.
  * @nice_len
  *     Stop searching if a match of at least this length is found.
  *     Must be <= @max_len.
@@ -305,7 +300,7 @@ TEMPLATED(bt_matchfinder_get_matches)(struct TEMPLATED(bt_matchfinder) *mf,
  * @cur_pos
  *     The current position in the input buffer.
  * @max_len
- *     The maximum permissible match length at this position.
+ *     The maximum permissible match length at this position.  Must be >= 5.
  * @nice_len
  *     Stop searching if a match of at least this length is found.
  * @max_search_depth
index 3e19bd191018ef11f28c7be61debfaf4a80181e2..d36f8aaa380ae747723f8e0f475862464af5145b 100644 (file)
@@ -1779,8 +1779,8 @@ lzx_compress_near_optimal(struct lzx_compressor *c,
        const u8 * const in_begin = c->in_buffer;
        const u8 *       in_next = in_begin;
        const u8 * const in_end  = in_begin + c->in_nbytes;
-       unsigned max_len = LZX_MAX_MATCH_LEN;
-       unsigned nice_len = min(c->nice_match_length, max_len);
+       u32 max_len = LZX_MAX_MATCH_LEN;
+       u32 nice_len = min(c->nice_match_length, max_len);
        u32 next_hash = 0;
        struct lzx_lru_queue queue;
 
@@ -1797,20 +1797,14 @@ lzx_compress_near_optimal(struct lzx_compressor *c,
                struct lz_match *cache_ptr = c->match_cache;
                do {
                        struct lz_match *lz_matchptr;
-                       unsigned best_len;
+                       u32 best_len;
 
                        /* If approaching the end of the input buffer, adjust
                         * 'max_len' and 'nice_len' accordingly.  */
                        if (unlikely(max_len > in_end - in_next)) {
                                max_len = in_end - in_next;
                                nice_len = min(max_len, nice_len);
-
-                               /* This extra check is needed to ensure that we
-                                * never output a length 2 match of the very
-                                * last two bytes with the very first two bytes,
-                                * since such a match has an offset too large to
-                                * be represented.  */
-                               if (unlikely(max_len < 3)) {
+                               if (unlikely(max_len < 5)) {
                                        in_next++;
                                        cache_ptr->length = 0;
                                        cache_ptr++;
@@ -1850,7 +1844,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c,
                                        if (unlikely(max_len > in_end - in_next)) {
                                                max_len = in_end - in_next;
                                                nice_len = min(max_len, nice_len);
-                                               if (unlikely(max_len < 3)) {
+                                               if (unlikely(max_len < 5)) {
                                                        in_next++;
                                                        cache_ptr->length = 0;
                                                        cache_ptr++;
index bce0901f523665b8b95aa1ae73864d1e8ccfc600..ba7a9af7272ecb3510d6a6ab04e7fec0319dd3cd 100644 (file)
@@ -906,27 +906,22 @@ xpress_find_matches(struct xpress_compressor * restrict c,
 {
        const u8 * const in_begin = in;
        const u8 *in_next = in_begin;
-       const u8 * const in_end = in_begin + in_nbytes;
        struct lz_match *cache_ptr = c->match_cache;
        u32 next_hash = 0;
+       u32 max_len = in_nbytes;
+       u32 nice_len = min(max_len, c->nice_match_length);
 
        bt_matchfinder_init(&c->bt_mf);
 
-       do {
+       for (;;) {
                struct lz_match *matches;
-               unsigned best_len;
+               u32 best_len;
 
                /* If we've found so many matches that the cache might overflow
                 * if we keep finding more, then stop finding matches.  This
                 * case is very unlikely.  */
-               if (unlikely(cache_ptr >= c->cache_overflow_mark)) {
-                       do {
-                               cache_ptr->length = 0;
-                               cache_ptr->offset = *in_next++;
-                               cache_ptr++;
-                       } while (in_next != in_end);
-                       return cache_ptr;
-               }
+               if (unlikely(cache_ptr >= c->cache_overflow_mark || max_len < 5))
+                       break;
 
                matches = cache_ptr;
 
@@ -937,16 +932,17 @@ xpress_find_matches(struct xpress_compressor * restrict c,
                        bt_matchfinder_get_matches(&c->bt_mf,
                                                   in_begin,
                                                   in_next - in_begin,
-                                                  in_end - in_next,
-                                                  min(in_end - in_next, c->nice_match_length),
+                                                  max_len,
+                                                  nice_len,
                                                   c->max_search_depth,
                                                   &next_hash,
                                                   &best_len,
                                                   cache_ptr);
                cache_ptr->length = cache_ptr - matches;
-               cache_ptr->offset = *in_next;
-               in_next++;
+               cache_ptr->offset = *in_next++;
                cache_ptr++;
+               max_len--;
+               nice_len = min(nice_len, max_len);
 
                /*
                 * If there was a very long match found, then don't cache any
@@ -958,24 +954,32 @@ xpress_find_matches(struct xpress_compressor * restrict c,
                 * very much.  If there's a long match, then the data must be
                 * highly compressible, so it doesn't matter as much what we do.
                 */
-               if (best_len >= c->nice_match_length) {
+               if (best_len >= nice_len) {
+                       if (unlikely(best_len + 5 >= max_len))
+                               break;
                        --best_len;
                        do {
                                bt_matchfinder_skip_position(&c->bt_mf,
                                                             in_begin,
                                                             in_next - in_begin,
-                                                            in_end - in_next,
-                                                            min(in_end - in_next,
-                                                                c->nice_match_length),
+                                                            max_len,
+                                                            nice_len,
                                                             c->max_search_depth,
                                                             &next_hash);
-
                                cache_ptr->length = 0;
                                cache_ptr->offset = *in_next++;
                                cache_ptr++;
+                               max_len--;
+                               nice_len = min(nice_len, max_len);
                        } while (--best_len);
                }
-       } while (in_next != in_end);
+       }
+
+       while (max_len--) {
+               cache_ptr->length = 0;
+               cache_ptr->offset = *in_next++;
+               cache_ptr++;
+       }
 
        return cache_ptr;
 }