X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzms_compress.c;h=1e2a4fce718d5da9cbfbbbf5ee6b0566eb3a5dd3;hp=ded8c0bb6cf231fa51e69547944d4a3a4a4f742a;hb=1bfcf1e9daf6ebcf8ff24817f05ac88ba29b3f47;hpb=3e8aa757aaa63297f0d54007adf46411778fb6a8 diff --git a/src/lzms_compress.c b/src/lzms_compress.c index ded8c0bb..1e2a4fce 100644 --- a/src/lzms_compress.c +++ b/src/lzms_compress.c @@ -25,20 +25,20 @@ # include "config.h" #endif +#include +#include +#include + #include "wimlib/compress_common.h" #include "wimlib/compressor_ops.h" #include "wimlib/endianness.h" #include "wimlib/error.h" -#include "wimlib/lz_mf.h" +#include "wimlib/lcpit_matchfinder.h" #include "wimlib/lz_repsearch.h" #include "wimlib/lzms_common.h" #include "wimlib/unaligned.h" #include "wimlib/util.h" -#include -#include -#include - /* Stucture used for writing raw bits as a series of 16-bit little endian coding * units. This starts at the *end* of the compressed data buffer and proceeds * backwards. */ @@ -144,7 +144,6 @@ struct lzms_huffman_encoder { struct lzms_compressor_params { u32 min_match_length; u32 nice_match_length; - u32 max_search_depth; u32 optim_array_length; }; @@ -159,7 +158,7 @@ struct lzms_compressor { u32 cur_window_size; /* Lempel-Ziv match-finder */ - struct lz_mf *mf; + struct lcpit_matchfinder mf; /* Temporary space to store found matches */ struct lz_match *matches; @@ -873,8 +872,8 @@ lzms_consider_lz_repeat_offset_match(const struct lzms_compressor *c, /* * Consider coding each match in @matches as an explicit offset match. * - * @matches must be sorted by strictly increasing length and strictly increasing - * offset. This is guaranteed by the match-finder. + * @matches must be sorted by strictly decreasing length. This is guaranteed by + * the match-finder. * * We consider each length from the minimum (2) to the longest * (matches[num_matches - 1].len). For each length, we consider only the @@ -905,7 +904,7 @@ lzms_consider_lz_explicit_offset_matches(const struct lzms_compressor *c, base_cost += lzms_rc_bit_cost(&c->lz_match_range_encoder, cur_optimum_ptr->state.lz_match_state, 0); len = 2; - i = 0; + i = num_matches - 1; do { position_cost = base_cost + lzms_lz_offset_cost(c, matches[i].offset); do { @@ -916,8 +915,8 @@ lzms_consider_lz_explicit_offset_matches(const struct lzms_compressor *c, << MC_OFFSET_SHIFT) | len; (cur_optimum_ptr + len)->cost = cost; } - } while (++len <= matches[i].len); - } while (++i != num_matches); + } while (++len <= matches[i].length); + } while (i--); } static void @@ -1039,8 +1038,7 @@ begin: for (;;) { /* Find explicit offset matches with the current position. */ - num_matches = lz_mf_get_matches(c->mf, c->matches); - + num_matches = lcpit_matchfinder_get_matches(&c->mf, c->matches); if (num_matches) { /* * Find the longest repeat offset match with the current @@ -1072,7 +1070,7 @@ begin: * choose it immediately. */ if (rep_max_len >= c->params.nice_match_length) { - lz_mf_skip_positions(c->mf, rep_max_len - 1); + lcpit_matchfinder_skip_bytes(&c->mf, rep_max_len - 1); window_ptr += rep_max_len; if (cur_optimum_ptr != c->optimum) @@ -1110,20 +1108,29 @@ begin: rep_max_len, rep_max_idx); } - longest_len = c->matches[num_matches - 1].len; + longest_len = c->matches[0].length; /* If there's a very long explicit offset match, choose * it immediately. */ if (longest_len >= c->params.nice_match_length) { - lz_mf_skip_positions(c->mf, longest_len - 1); + u32 offset = c->matches[0].offset; + + /* Extend the match as far as possible. (The + * LCP-interval tree matchfinder only reports up + * to the "nice" length.) */ + longest_len = lz_extend(window_ptr, + window_ptr - offset, + longest_len, + window_end - window_ptr); + + lcpit_matchfinder_skip_bytes(&c->mf, longest_len - 1); window_ptr += longest_len; if (cur_optimum_ptr != c->optimum) lzms_encode_item_list(c, cur_optimum_ptr); - lzms_encode_lz_explicit_offset_match(c, longest_len, - c->matches[num_matches - 1].offset); + lzms_encode_lz_explicit_offset_match(c, longest_len, offset); c->optimum[0].state = cur_optimum_ptr->state; @@ -1131,8 +1138,7 @@ begin: lzms_update_match_state(&c->optimum[0].state, 0); lzms_update_lz_match_state(&c->optimum[0].state, 0); - c->optimum[0].state.lru.upcoming_offset = - c->matches[num_matches - 1].offset; + c->optimum[0].state.lru.upcoming_offset = offset; lzms_update_lz_lru_queue(&c->optimum[0].state.lru); goto begin; @@ -1400,39 +1406,17 @@ lzms_build_params(unsigned int compression_level, else params->min_match_length = 3; - /* Scale nice_match_length and max_search_depth with the compression - * level. But to allow an optimization on length cost calculations, - * don't allow nice_match_length to exceed LZMS_NUM_FAST_LENGTH. */ + /* Scale nice_match_length with the compression level. But to allow an + * optimization on length cost calculations, don't allow + * nice_match_length to exceed LZMS_NUM_FAST_LENGTH. */ params->nice_match_length = ((u64)compression_level * 32) / 50; if (params->nice_match_length < params->min_match_length) params->nice_match_length = params->min_match_length; if (params->nice_match_length > LZMS_NUM_FAST_LENGTHS) params->nice_match_length = LZMS_NUM_FAST_LENGTHS; - params->max_search_depth = compression_level; - params->optim_array_length = 1024; } -/* Given the internal compression parameters and maximum window size, build the - * Lempel-Ziv match-finder parameters. */ -static void -lzms_build_mf_params(const struct lzms_compressor_params *lzms_params, - u32 max_window_size, struct lz_mf_params *mf_params) -{ - memset(mf_params, 0, sizeof(*mf_params)); - - /* Choose an appropriate match-finding algorithm. */ - if (max_window_size <= 33554432) - mf_params->algorithm = LZ_MF_LCP_INTERVAL_TREE; - else - mf_params->algorithm = LZ_MF_LINKED_SUFFIX_ARRAY; - - mf_params->max_window_size = max_window_size; - mf_params->min_match_len = lzms_params->min_match_length; - mf_params->max_search_depth = lzms_params->max_search_depth; - mf_params->nice_match_len = lzms_params->nice_match_length; -} - static void lzms_free_compressor(void *_c); @@ -1440,14 +1424,12 @@ static u64 lzms_get_needed_memory(size_t max_block_size, unsigned int compression_level) { struct lzms_compressor_params params; - struct lz_mf_params mf_params; u64 size = 0; if (max_block_size > LZMS_MAX_BUFFER_SIZE) return 0; lzms_build_params(compression_level, ¶ms); - lzms_build_mf_params(¶ms, max_block_size, &mf_params); size += sizeof(struct lzms_compressor); @@ -1455,10 +1437,10 @@ lzms_get_needed_memory(size_t max_block_size, unsigned int compression_level) size += max_block_size; /* mf */ - size += lz_mf_get_needed_memory(mf_params.algorithm, max_block_size); + size += lcpit_matchfinder_get_needed_memory(max_block_size); /* matches */ - size += min(params.max_search_depth, params.nice_match_length) * + size += (params.nice_match_length - params.min_match_length + 1) * sizeof(struct lz_match); /* optimum */ @@ -1474,15 +1456,11 @@ lzms_create_compressor(size_t max_block_size, unsigned int compression_level, { struct lzms_compressor *c; struct lzms_compressor_params params; - struct lz_mf_params mf_params; if (max_block_size > LZMS_MAX_BUFFER_SIZE) return WIMLIB_ERR_INVALID_PARAM; lzms_build_params(compression_level, ¶ms); - lzms_build_mf_params(¶ms, max_block_size, &mf_params); - if (!lz_mf_params_valid(&mf_params)) - return WIMLIB_ERR_INVALID_PARAM; c = CALLOC(1, sizeof(struct lzms_compressor)); if (!c) @@ -1494,12 +1472,12 @@ lzms_create_compressor(size_t max_block_size, unsigned int compression_level, if (!c->cur_window) goto oom; - c->mf = lz_mf_alloc(&mf_params); - if (!c->mf) + if (!lcpit_matchfinder_init(&c->mf, max_block_size, + c->params.min_match_length, + c->params.nice_match_length)) goto oom; - c->matches = MALLOC(min(params.max_search_depth, - params.nice_match_length) * + c->matches = MALLOC((params.nice_match_length - params.min_match_length + 1) * sizeof(struct lz_match)); if (!c->matches) goto oom; @@ -1549,7 +1527,7 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size, c->last_target_usages, false); /* Load the window into the match-finder. */ - lz_mf_load_window(c->mf, c->cur_window, c->cur_window_size); + lcpit_matchfinder_load_buffer(&c->mf, c->cur_window, c->cur_window_size); /* Compute and encode a literal/match sequence that decompresses to the * preprocessed data. */ @@ -1566,7 +1544,7 @@ lzms_free_compressor(void *_c) if (c) { FREE(c->cur_window); - lz_mf_free(c->mf); + lcpit_matchfinder_destroy(&c->mf); FREE(c->matches); FREE(c->optimum); FREE(c);