X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;ds=sidebyside;f=src%2Flzx_compress.c;h=4434cde34a02bd13a8b1685765207c2d8b15bee1;hb=e7a3df0a6bf2af6500611f6c464dc36cab3332d8;hp=45bb019bb94341c1a6cf146239fffe8ff15ea943;hpb=3e8aa757aaa63297f0d54007adf46411778fb6a8;p=wimlib diff --git a/src/lzx_compress.c b/src/lzx_compress.c index 45bb019b..4434cde3 100644 --- a/src/lzx_compress.c +++ b/src/lzx_compress.c @@ -79,12 +79,28 @@ * LZX_CACHE_PER_POS is the number of lz_match structures to reserve in the * match cache for each byte position. This value should be high enough so that * nearly the time, all matches found in a given block can fit in the match - * cache. However, fallback behavior on cache overflow is still required. + * cache. However, fallback behavior (immediately terminating the block) on + * cache overflow is still required. */ #define LZX_CACHE_PER_POS 6 -#define LZX_CACHE_LEN (LZX_DIV_BLOCK_SIZE * (LZX_CACHE_PER_POS + 1)) +/* + * LZX_CACHE_LENGTH is the number of lz_match structures in the match cache, + * excluding the extra "overflow" entries. The per-position multiplier is '1 + + * LZX_CACHE_PER_POS' instead of 'LZX_CACHE_PER_POS' because there is an + * overhead of one lz_match per position, used to hold the match count at that + * position. + */ +#define LZX_CACHE_LENGTH (LZX_DIV_BLOCK_SIZE * (1 + LZX_CACHE_PER_POS)) +/* + * LZX_MAX_MATCHES_PER_POS is an upper bound on the number of matches that can + * ever be saved in the match cache for a single position. Since each match we + * save for a single position has a distinct length, we can use the number of + * possible match lengths in LZX as this bound. This bound is guaranteed to be + * valid in all cases, although if 'nice_match_length < LZX_MAX_MATCH_LEN', then + * it will never actually be reached. + */ #define LZX_MAX_MATCHES_PER_POS LZX_NUM_LENS /* @@ -361,9 +377,26 @@ struct lzx_compressor { struct lzx_codes codes[2]; unsigned codes_index; - /* The match/literal sequence the algorithm chose for the current block. + /* + * The match/literal sequence the algorithm chose for the current block. + * + * Notes on how large this array actually needs to be: + * + * - In lzx_compress_near_optimal(), the maximum block size is + * 'LZX_DIV_BLOCK_SIZE + LZX_MAX_MATCH_LEN - 1' bytes. This occurs if + * a match of the maximum length is found on the last byte. Although + * it is impossible for this particular case to actually result in a + * parse of all literals, we reserve this many spaces anyway. + * + * - The worst case for lzx_compress_lazy() is a block of almost all + * literals that ends with a series of matches of increasing scores, + * causing a sequence of literals to be chosen before the last match + * is finally chosen. The number of items actually chosen in this + * scenario is limited by the number of distinct match scores that + * exist for matches shorter than 'nice_match_length'. Having + * 'LZX_MAX_MATCH_LEN - 1' extra spaces is plenty for now. */ - struct lzx_item chosen_items[LZX_DIV_BLOCK_SIZE + LZX_MAX_MATCH_LEN + 1]; + struct lzx_item chosen_items[LZX_DIV_BLOCK_SIZE + LZX_MAX_MATCH_LEN - 1]; /* Table mapping match offset => offset slot for small offsets */ #define LZX_NUM_FAST_OFFSETS 32768 @@ -378,17 +411,55 @@ struct lzx_compressor { /* Data for near-optimal parsing */ struct { - /* The graph nodes for the current block */ + /* + * The graph nodes for the current block. + * + * We need at least 'LZX_DIV_BLOCK_SIZE + + * LZX_MAX_MATCH_LEN - 1' nodes because that is the + * maximum block size that may be used. Add 1 because + * we need a node to represent end-of-block. + * + * It is possible that nodes past end-of-block are + * accessed during match consideration, but this can + * only occur if the block was truncated at + * LZX_DIV_BLOCK_SIZE. So the same bound still applies. + * Note that since nodes past the end of the block will + * never actually have an effect on the items that are + * chosen for the block, it makes no difference what + * their costs are initialized to (if anything). + */ struct lzx_optimum_node optimum_nodes[LZX_DIV_BLOCK_SIZE + - LZX_MAX_MATCH_LEN + 1]; + LZX_MAX_MATCH_LEN - 1 + 1]; /* The cost model for the current block */ struct lzx_costs costs; - /* Cached matches for the current block */ - struct lz_match match_cache[LZX_CACHE_LEN + 1 + - LZX_MAX_MATCHES_PER_POS]; - struct lz_match *cache_overflow_mark; + /* + * Cached matches for the current block. This array + * contains the matches that were found at each position + * in the block. Specifically, for each position, there + * is a special 'struct lz_match' whose 'length' field + * contains the number of matches that were found at + * that position; this is followed by the matches + * themselves, if any, sorted by strictly increasing + * length and strictly increasing offset. + * + * Note: in rare cases, there will be a very high number + * of matches in the block and this array will overflow. + * If this happens, we force the end of the current + * block. LZX_CACHE_LENGTH is the length at which we + * actually check for overflow. The extra slots beyond + * this are enough to absorb the worst case overflow, + * which occurs if starting at + * &match_cache[LZX_CACHE_LENGTH - 1], we write the + * match count header, then write + * LZX_MAX_MATCHES_PER_POS matches, then skip searching + * for matches at 'LZX_MAX_MATCH_LEN - 1' positions and + * write the match count header for each. + */ + struct lz_match match_cache[LZX_CACHE_LENGTH + + LZX_MAX_MATCHES_PER_POS + + LZX_MAX_MATCH_LEN - 1]; /* Hash table for finding length 2 matches */ pos_t hash2_tab[LZX_HASH2_LENGTH] @@ -400,17 +471,6 @@ struct lzx_compressor { }; }; -/* Compute a hash value for the next 2 bytes of uncompressed data. */ -static inline u32 -lz_hash_2_bytes(const u8 *in_next) -{ - u16 next_2_bytes = load_u16_unaligned(in_next); - if (LZX_HASH2_ORDER == 16) - return next_2_bytes; - else - return lz_hash(next_2_bytes, LZX_HASH2_ORDER); -} - /* * Structure to keep track of the current state of sending bits to the * compressed output buffer. @@ -1561,9 +1621,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c, * match of the very last two bytes with the * very first two bytes, since such a match has * an offset too large to be represented. */ - if (unlikely(max_len < - max(LZ_HASH_REQUIRED_NBYTES, 3))) - { + if (unlikely(max_len < 3)) { in_next++; cache_ptr->length = 0; cache_ptr++; @@ -1574,7 +1632,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c, lz_matchptr = cache_ptr + 1; /* Check for a length 2 match. */ - hash2 = lz_hash_2_bytes(in_next); + hash2 = lz_hash_2_bytes(in_next, LZX_HASH2_ORDER); cur_match = c->hash2_tab[hash2]; c->hash2_tab[hash2] = in_next - in_begin; if (matchfinder_node_valid(cur_match) && @@ -1621,16 +1679,14 @@ lzx_compress_near_optimal(struct lzx_compressor *c, if (unlikely(max_len > in_end - in_next)) { max_len = in_end - in_next; nice_len = min(max_len, nice_len); - if (unlikely(max_len < - max(LZ_HASH_REQUIRED_NBYTES, 3))) - { + if (unlikely(max_len < 3)) { in_next++; cache_ptr->length = 0; cache_ptr++; continue; } } - c->hash2_tab[lz_hash_2_bytes(in_next)] = + c->hash2_tab[lz_hash_2_bytes(in_next, LZX_HASH2_ORDER)] = in_next - in_begin; bt_matchfinder_skip_position(&c->bt_mf, in_begin, @@ -1645,7 +1701,7 @@ lzx_compress_near_optimal(struct lzx_compressor *c, } while (--best_len); } } while (in_next < in_block_end && - likely(cache_ptr < c->cache_overflow_mark)); + likely(cache_ptr < &c->match_cache[LZX_CACHE_LENGTH])); /* We've finished running the block through the matchfinder. * Now choose a match/literal sequence and write the block. */ @@ -1991,9 +2047,13 @@ lzx_create_compressor(size_t max_bufsize, unsigned compression_level, c->impl = lzx_compress_lazy; c->max_search_depth = (36 * compression_level) / 20; - c->nice_match_length = min((72 * compression_level) / 20, - LZX_MAX_MATCH_LEN); + c->nice_match_length = (72 * compression_level) / 20; + /* lzx_compress_lazy() needs max_search_depth >= 2 because it + * halves the max_search_depth when attempting a lazy match, and + * max_search_depth cannot be 0. */ + if (c->max_search_depth < 2) + c->max_search_depth = 2; } else { /* Normal / high compression: Use near-optimal parsing. */ @@ -2003,8 +2063,7 @@ lzx_create_compressor(size_t max_bufsize, unsigned compression_level, /* Scale nice_match_length and max_search_depth with the * compression level. */ c->max_search_depth = (24 * compression_level) / 50; - c->nice_match_length = min((32 * compression_level) / 50, - LZX_MAX_MATCH_LEN); + c->nice_match_length = (32 * compression_level) / 50; /* Set a number of optimization passes appropriate for the * compression level. */ @@ -2028,10 +2087,15 @@ lzx_create_compressor(size_t max_bufsize, unsigned compression_level, if (compression_level >= 300) c->num_optim_passes++; } - - c->cache_overflow_mark = &c->match_cache[LZX_CACHE_LEN]; } + /* max_search_depth == 0 is invalid. */ + if (c->max_search_depth < 1) + c->max_search_depth = 1; + + if (c->nice_match_length > LZX_MAX_MATCH_LEN) + c->nice_match_length = LZX_MAX_MATCH_LEN; + lzx_init_offset_slot_fast(c); *c_ret = c; return 0;