X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;ds=sidebyside;f=src%2Flzms-compress.c;h=f417fa526d93de2ee17cd36467fdab70735960f7;hb=4dd45340f9fe3a533e0f1a9d6b79f8118e45ca2a;hp=6df108061e8518d22a3d2e238d8d1d6484e98b86;hpb=62e209e9aeaa36ba9e3c2a174428805b7264e0e7;p=wimlib diff --git a/src/lzms-compress.c b/src/lzms-compress.c index 6df10806..f417fa52 100644 --- a/src/lzms-compress.c +++ b/src/lzms-compress.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 2013 Eric Biggers + * Copyright (C) 2013, 2014 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * @@ -24,8 +24,10 @@ /* This a compressor for the LZMS compression format. More details about this * format can be found in lzms-decompress.c. * - * This is currently an unsophisticated implementation that is fast but does not - * attain the best compression ratios allowed by the format. + * Also see lzx-compress.c for general information about match-finding and + * match-choosing that also applies to this LZMS compressor. + * + * NOTE: this compressor currently does not code any delta matches. */ #ifdef HAVE_CONFIG_H @@ -39,8 +41,7 @@ #include "wimlib/compress_common.h" #include "wimlib/endianness.h" #include "wimlib/error.h" -#include "wimlib/lz_hash.h" -#include "wimlib/lz_sarray.h" +#include "wimlib/lz_mf.h" #include "wimlib/lzms.h" #include "wimlib/util.h" @@ -48,20 +49,6 @@ #include #include -#define LZMS_OPTIM_ARRAY_SIZE 1024 - -struct lzms_compressor; -struct lzms_adaptive_state { - struct lzms_lz_lru_queues lru; - u8 main_state; - u8 match_state; - u8 lz_match_state; - u8 lz_repeat_match_state[LZMS_NUM_RECENT_OFFSETS - 1]; -}; -#define LZ_ADAPTIVE_STATE struct lzms_adaptive_state -#define LZ_COMPRESSOR struct lzms_compressor -#include "wimlib/lz_optimal.h" - /* Stucture used for writing raw bits to the end of the LZMS-compressed data as * a series of 16-bit little endian coding units. */ struct lzms_output_bitstream { @@ -128,7 +115,7 @@ struct lzms_range_encoder { * lzms_range_encoder_raw. */ struct lzms_range_encoder_raw *rc; - /* Bits recently encoded by this range encoder. This are used as in + /* Bits recently encoded by this range encoder. This is used as an * index into @prob_entries. */ u32 state; @@ -140,9 +127,7 @@ struct lzms_range_encoder { struct lzms_probability_entry prob_entries[LZMS_MAX_NUM_STATES]; }; -/* Structure used for Huffman encoding, optionally encoding larger "values" as a - * Huffman symbol specifying a slot and a slot-dependent number of extra bits. - * */ +/* Structure used for Huffman encoding. */ struct lzms_huffman_encoder { /* Bitstream to write Huffman-encoded symbols and verbatim bits to. @@ -150,9 +135,6 @@ struct lzms_huffman_encoder { */ struct lzms_output_bitstream *os; - /* Pointer to the slot base table to use. */ - const u32 *slot_base_tab; - /* Number of symbols that have been written using this code far. Reset * to 0 whenever the code is rebuilt. */ u32 num_syms_written; @@ -172,7 +154,14 @@ struct lzms_huffman_encoder { u8 lens[LZMS_MAX_NUM_SYMS]; /* The codeword of each symbol in the Huffman code. */ - u16 codewords[LZMS_MAX_NUM_SYMS]; + u32 codewords[LZMS_MAX_NUM_SYMS]; +}; + +struct lzms_compressor_params { + u32 min_match_length; + u32 nice_match_length; + u32 max_search_depth; + u32 optim_array_length; }; /* State of the LZMS compressor. */ @@ -186,24 +175,24 @@ struct lzms_compressor { /* Size of the data in @buffer. */ u32 window_size; -#if 0 - /* Temporary array used by lz_analyze_block(); must be at least as long - * as the window. */ - u32 *prev_tab; -#endif - - /* Suffix array match-finder. */ - struct lz_sarray lz_sarray; + /* Lempel-Ziv match-finder. */ + struct lz_mf *mf; - struct raw_match matches[64]; + /* Temporary space to store found matches. */ + struct lz_match *matches; - /* Match-chooser. */ - struct lz_match_chooser mc; + /* Match-chooser data. */ + struct lzms_mc_pos_data *optimum; + unsigned optimum_cur_idx; + unsigned optimum_end_idx; /* Maximum block size this compressor instantiation allows. This is the * allocated size of @window. */ u32 max_block_size; + /* Compression parameters. */ + struct lzms_compressor_params params; + /* Raw range encoder which outputs to the beginning of the compressed * data buffer, proceeding forwards. */ struct lzms_range_encoder_raw rc; @@ -234,6 +223,28 @@ struct lzms_compressor { s32 last_target_usages[65536]; }; +struct lzms_mc_pos_data { + u32 cost; +#define MC_INFINITE_COST ((u32)~0UL) + union { + struct { + u32 link; + u32 match_offset; + } prev; + struct { + u32 link; + u32 match_offset; + } next; + }; + struct lzms_adaptive_state { + struct lzms_lz_lru_queues lru; + u8 main_state; + u8 match_state; + u8 lz_match_state; + u8 lz_repeat_match_state[LZMS_NUM_RECENT_OFFSETS - 1]; + } state; +}; + /* Initialize the output bitstream @os to write forwards to the specified * compressed data buffer @out that is @out_limit 16-bit integers long. */ static void @@ -460,29 +471,36 @@ lzms_huffman_encode_symbol(struct lzms_huffman_encoder *enc, u32 sym) } } -/* Encode a number as a Huffman symbol specifying a slot, plus a number of - * slot-dependent extra bits. */ static void -lzms_encode_value(struct lzms_huffman_encoder *enc, u32 value) +lzms_encode_length(struct lzms_huffman_encoder *enc, u32 length) { unsigned slot; unsigned num_extra_bits; u32 extra_bits; - LZMS_ASSERT(enc->slot_base_tab != NULL); + slot = lzms_get_length_slot(length); + + num_extra_bits = lzms_extra_length_bits[slot]; + + extra_bits = length - lzms_length_slot_base[slot]; + + lzms_huffman_encode_symbol(enc, slot); + lzms_output_bitstream_put_bits(enc->os, extra_bits, num_extra_bits); +} + +static void +lzms_encode_offset(struct lzms_huffman_encoder *enc, u32 offset) +{ + unsigned slot; + unsigned num_extra_bits; + u32 extra_bits; - slot = lzms_get_slot(value, enc->slot_base_tab, enc->num_syms); + slot = lzms_get_position_slot(offset); - /* Get the number of extra bits needed to represent the range of values - * that share the slot. */ - num_extra_bits = bsr32(enc->slot_base_tab[slot + 1] - - enc->slot_base_tab[slot]); + num_extra_bits = lzms_extra_position_bits[slot]; - /* Calculate the extra bits as the offset from the slot base. */ - extra_bits = value - enc->slot_base_tab[slot]; + extra_bits = offset - lzms_position_slot_base[slot]; - /* Output the slot (Huffman-encoded), then the extra bits (verbatim). - */ lzms_huffman_encode_symbol(enc, slot); lzms_output_bitstream_put_bits(enc->os, extra_bits, num_extra_bits); } @@ -541,7 +559,7 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) /* Main bit: 1 = a match, not a literal. */ lzms_range_encode_bit(&ctx->main_range_encoder, 1); - /* Match bit: 0 = a LZ match, not a delta match. */ + /* Match bit: 0 = an LZ match, not a delta match. */ lzms_range_encode_bit(&ctx->match_range_encoder, 0); /* Determine if the offset can be represented as a recent offset. */ @@ -558,7 +576,7 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) lzms_range_encode_bit(&ctx->lz_match_range_encoder, 0); /* Encode the match offset. */ - lzms_encode_value(&ctx->lz_offset_encoder, offset); + lzms_encode_offset(&ctx->lz_offset_encoder, offset); } else { int i; @@ -583,7 +601,7 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) } /* Encode the match length. */ - lzms_encode_value(&ctx->length_encoder, length); + lzms_encode_length(&ctx->length_encoder, length); /* Save the match offset for later insertion at the front of the LZ * match offset LRU queue. */ @@ -592,69 +610,12 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) lzms_end_encode_item(ctx, length); } -#if 0 -static void -lzms_record_literal(u8 literal, void *_ctx) -{ - struct lzms_compressor *ctx = _ctx; - - lzms_encode_literal(ctx, literal); -} - -static void -lzms_record_match(unsigned length, unsigned offset, void *_ctx) -{ - struct lzms_compressor *ctx = _ctx; - - lzms_encode_lz_match(ctx, length, offset); -} - -static void -lzms_fast_encode(struct lzms_compressor *ctx) -{ - static const struct lz_params lzms_lz_params = { - .min_match = 3, - .max_match = UINT_MAX, - .max_offset = UINT_MAX, - .nice_match = 64, - .good_match = 32, - .max_chain_len = 64, - .max_lazy_match = 258, - .too_far = 4096, - }; - - lz_analyze_block(ctx->window, - ctx->window_size, - lzms_record_match, - lzms_record_literal, - ctx, - &lzms_lz_params, - ctx->prev_tab); - -} -#endif - -/* Fast heuristic cost evaluation to use in the inner loop of the match-finder. - * Unlike lzms_get_lz_match_cost(), which does a true cost evaluation, this - * simply prioritize matches based on their offset. */ -static input_idx_t -lzms_lz_match_cost_fast(input_idx_t length, input_idx_t offset, const void *_lru) -{ - const struct lzms_lz_lru_queues *lru = _lru; - - for (input_idx_t i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) - if (offset == lru->recent_offsets[i]) - return i; - - return offset; -} - #define LZMS_COST_SHIFT 5 /*#define LZMS_RC_COSTS_USE_FLOATING_POINT*/ static u32 -lzms_rc_costs[LZMS_PROBABILITY_MAX]; +lzms_rc_costs[LZMS_PROBABILITY_MAX + 1]; #ifdef LZMS_RC_COSTS_USE_FLOATING_POINT # include @@ -684,7 +645,7 @@ lzms_do_init_rc_costs(void) * really interpreted as 1 / 64 and 64 / 64 is really interpreted as * 63 / 64. */ - for (u32 i = 0; i < LZMS_PROBABILITY_MAX; i++) { + for (u32 i = 0; i <= LZMS_PROBABILITY_MAX; i++) { u32 prob = i; if (prob == 0) @@ -715,17 +676,9 @@ lzms_do_init_rc_costs(void) static void lzms_init_rc_costs(void) { - static bool done = false; - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - - if (!done) { - pthread_mutex_lock(&mutex); - if (!done) { - lzms_do_init_rc_costs(); - done = true; - } - pthread_mutex_unlock(&mutex); - } + static pthread_once_t once = PTHREAD_ONCE_INIT; + + pthread_once(&once, lzms_do_init_rc_costs); } /* @@ -747,11 +700,6 @@ lzms_rc_bit_cost(const struct lzms_range_encoder *enc, u8 *cur_state, int bit) *cur_state = (*cur_state << 1) | bit; - if (prob_zero == 0) - prob_zero = 1; - else if (prob_zero == LZMS_PROBABILITY_MAX) - prob_zero = LZMS_PROBABILITY_MAX - 1; - if (bit == 0) prob_correct = prob_zero; else @@ -766,20 +714,18 @@ lzms_huffman_symbol_cost(const struct lzms_huffman_encoder *enc, u32 sym) return enc->lens[sym] << LZMS_COST_SHIFT; } -/* Compute the cost to encode a number with lzms_encode_value(). */ static u32 -lzms_value_cost(const struct lzms_huffman_encoder *enc, u32 value) +lzms_offset_cost(const struct lzms_huffman_encoder *enc, u32 offset) { u32 slot; u32 num_extra_bits; u32 cost = 0; - slot = lzms_get_slot(value, enc->slot_base_tab, enc->num_syms); + slot = lzms_get_position_slot(offset); cost += lzms_huffman_symbol_cost(enc, slot); - num_extra_bits = bsr32(enc->slot_base_tab[slot + 1] - - enc->slot_base_tab[slot]); + num_extra_bits = lzms_extra_position_bits[slot]; cost += num_extra_bits << LZMS_COST_SHIFT; @@ -787,55 +733,40 @@ lzms_value_cost(const struct lzms_huffman_encoder *enc, u32 value) } static u32 -lzms_get_matches(struct lzms_compressor *ctx, - const struct lzms_adaptive_state *state, - struct raw_match **matches_ret) +lzms_get_length_cost(const struct lzms_huffman_encoder *enc, u32 length) { - u32 num_matches; - struct raw_match *matches = ctx->matches; - - num_matches = lz_sarray_get_matches(&ctx->lz_sarray, - matches, - lzms_lz_match_cost_fast, - &state->lru); -#if 0 - fprintf(stderr, "Pos %u: %u matches\n", - lz_sarray_get_pos(&ctx->lz_sarray) - 1, num_matches); - for (u32 i = 0; i < num_matches; i++) - fprintf(stderr, "\tLen %u Offset %u\n", matches[i].len, matches[i].offset); -#endif + u32 slot; + u32 num_extra_bits; + u32 cost = 0; -#ifdef ENABLE_LZMS_DEBUG - LZMS_ASSERT(lz_sarray_get_pos(&ctx->lz_sarray) > 0); - u32 curpos = lz_sarray_get_pos(&ctx->lz_sarray) - 1; - for (u32 i = 0; i < num_matches; i++) { - LZMS_ASSERT(matches[i].len <= ctx->window_size - curpos); - LZMS_ASSERT(matches[i].offset > 0); - LZMS_ASSERT(matches[i].offset <= curpos); - LZMS_ASSERT(!memcmp(&ctx->window[curpos], - &ctx->window[curpos - matches[i].offset], - matches[i].len)); - if (i < num_matches - 1) - LZMS_ASSERT(matches[i].len > matches[i + 1].len); + slot = lzms_get_length_slot(length); - } -#endif - *matches_ret = matches; - return num_matches; + cost += lzms_huffman_symbol_cost(enc, slot); + + num_extra_bits = lzms_extra_length_bits[slot]; + + cost += num_extra_bits << LZMS_COST_SHIFT; + + return cost; +} + +static u32 +lzms_get_matches(struct lzms_compressor *ctx, struct lz_match **matches_ret) +{ + *matches_ret = ctx->matches; + return lz_mf_get_matches(ctx->mf, ctx->matches); } static void -lzms_skip_bytes(struct lzms_compressor *ctx, input_idx_t n) +lzms_skip_bytes(struct lzms_compressor *ctx, u32 n) { - while (n--) - lz_sarray_skip_position(&ctx->lz_sarray); + lz_mf_skip_positions(ctx->mf, n); } static u32 -lzms_get_prev_literal_cost(struct lzms_compressor *ctx, - struct lzms_adaptive_state *state) +lzms_get_literal_cost(struct lzms_compressor *ctx, + struct lzms_adaptive_state *state, u8 literal) { - u8 literal = ctx->window[lz_sarray_get_pos(&ctx->lz_sarray) - 1]; u32 cost = 0; state->lru.upcoming_offset = 0; @@ -850,9 +781,8 @@ lzms_get_prev_literal_cost(struct lzms_compressor *ctx, } static u32 -lzms_get_lz_match_cost(struct lzms_compressor *ctx, - struct lzms_adaptive_state *state, - input_idx_t length, input_idx_t offset) +lzms_get_lz_match_cost_nolen(struct lzms_compressor *ctx, + struct lzms_adaptive_state *state, u32 offset) { u32 cost = 0; int recent_offset_idx; @@ -873,7 +803,7 @@ lzms_get_lz_match_cost(struct lzms_compressor *ctx, cost += lzms_rc_bit_cost(&ctx->lz_match_range_encoder, &state->lz_match_state, 0); - cost += lzms_value_cost(&ctx->lz_offset_encoder, offset); + cost += lzms_offset_cost(&ctx->lz_offset_encoder, offset); } else { int i; @@ -895,7 +825,6 @@ lzms_get_lz_match_cost(struct lzms_compressor *ctx, state->lru.recent_offsets[i] = state->lru.recent_offsets[i + 1]; } - cost += lzms_value_cost(&ctx->length_encoder, length); state->lru.upcoming_offset = offset; lzms_update_lz_lru_queues(&state->lru); @@ -903,25 +832,288 @@ lzms_get_lz_match_cost(struct lzms_compressor *ctx, return cost; } -static struct raw_match +static u32 +lzms_get_lz_match_cost(struct lzms_compressor *ctx, + struct lzms_adaptive_state *state, + u32 length, u32 offset) +{ + return lzms_get_lz_match_cost_nolen(ctx, state, offset) + + lzms_get_length_cost(&ctx->length_encoder, length); +} + +static struct lz_match +lzms_match_chooser_reverse_list(struct lzms_compressor *ctx, unsigned cur_pos) +{ + unsigned prev_link, saved_prev_link; + unsigned prev_match_offset, saved_prev_match_offset; + + ctx->optimum_end_idx = cur_pos; + + saved_prev_link = ctx->optimum[cur_pos].prev.link; + saved_prev_match_offset = ctx->optimum[cur_pos].prev.match_offset; + + do { + prev_link = saved_prev_link; + prev_match_offset = saved_prev_match_offset; + + saved_prev_link = ctx->optimum[prev_link].prev.link; + saved_prev_match_offset = ctx->optimum[prev_link].prev.match_offset; + + ctx->optimum[prev_link].next.link = cur_pos; + ctx->optimum[prev_link].next.match_offset = prev_match_offset; + + cur_pos = prev_link; + } while (cur_pos != 0); + + ctx->optimum_cur_idx = ctx->optimum[0].next.link; + + return (struct lz_match) + { .len = ctx->optimum_cur_idx, + .offset = ctx->optimum[0].next.match_offset, + }; +} + +/* This is similar to lzx_choose_near_optimal_match() in lzx-compress.c. + * Read that one if you want to understand it. */ +static struct lz_match lzms_get_near_optimal_match(struct lzms_compressor *ctx) { + u32 num_matches; + struct lz_match *matches; + struct lz_match match; + u32 longest_len; + u32 longest_rep_len; + u32 longest_rep_offset; + unsigned cur_pos; + unsigned end_pos; struct lzms_adaptive_state initial_state; + if (ctx->optimum_cur_idx != ctx->optimum_end_idx) { + match.len = ctx->optimum[ctx->optimum_cur_idx].next.link - + ctx->optimum_cur_idx; + match.offset = ctx->optimum[ctx->optimum_cur_idx].next.match_offset; + + ctx->optimum_cur_idx = ctx->optimum[ctx->optimum_cur_idx].next.link; + return match; + } + + ctx->optimum_cur_idx = 0; + ctx->optimum_end_idx = 0; + + longest_rep_len = ctx->params.min_match_length - 1; + if (lz_mf_get_position(ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) { + u32 limit = lz_mf_get_bytes_remaining(ctx->mf); + for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) { + u32 offset = ctx->lru.lz.recent_offsets[i]; + const u8 *strptr = lz_mf_get_window_ptr(ctx->mf); + const u8 *matchptr = strptr - offset; + u32 len = 0; + while (len < limit && strptr[len] == matchptr[len]) + len++; + if (len > longest_rep_len) { + longest_rep_len = len; + longest_rep_offset = offset; + } + } + } + + if (longest_rep_len >= ctx->params.nice_match_length) { + lzms_skip_bytes(ctx, longest_rep_len); + return (struct lz_match) { + .len = longest_rep_len, + .offset = longest_rep_offset, + }; + } + + num_matches = lzms_get_matches(ctx, &matches); + + if (num_matches) { + longest_len = matches[num_matches - 1].len; + if (longest_len >= ctx->params.nice_match_length) { + lzms_skip_bytes(ctx, longest_len - 1); + return matches[num_matches - 1]; + } + } else { + longest_len = 1; + } + initial_state.lru = ctx->lru.lz; initial_state.main_state = ctx->main_range_encoder.state; initial_state.match_state = ctx->match_range_encoder.state; initial_state.lz_match_state = ctx->lz_match_range_encoder.state; for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS - 1; i++) - initial_state.lz_repeat_match_state[i] = - ctx->lz_repeat_match_range_encoders[i].state; - return lz_get_near_optimal_match(&ctx->mc, - lzms_get_matches, - lzms_skip_bytes, - lzms_get_prev_literal_cost, - lzms_get_lz_match_cost, - ctx, - &initial_state); + initial_state.lz_repeat_match_state[i] = ctx->lz_repeat_match_range_encoders[i].state; + + ctx->optimum[1].state = initial_state; + ctx->optimum[1].cost = lzms_get_literal_cost(ctx, + &ctx->optimum[1].state, + *(lz_mf_get_window_ptr(ctx->mf) - 1)); + ctx->optimum[1].prev.link = 0; + + for (u32 i = 0, len = 2; i < num_matches; i++) { + u32 offset = matches[i].offset; + struct lzms_adaptive_state state; + u32 position_cost; + + state = initial_state; + position_cost = 0; + position_cost += lzms_get_lz_match_cost_nolen(ctx, &state, offset); + + do { + u32 cost; + + cost = position_cost; + cost += lzms_get_length_cost(&ctx->length_encoder, len); + + ctx->optimum[len].state = state; + ctx->optimum[len].prev.link = 0; + ctx->optimum[len].prev.match_offset = offset; + ctx->optimum[len].cost = cost; + } while (++len <= matches[i].len); + } + end_pos = longest_len; + + if (longest_rep_len >= ctx->params.min_match_length) { + struct lzms_adaptive_state state; + u32 cost; + + while (end_pos < longest_rep_len) + ctx->optimum[++end_pos].cost = MC_INFINITE_COST; + + state = initial_state; + cost = lzms_get_lz_match_cost(ctx, + &state, + longest_rep_len, + longest_rep_offset); + if (cost <= ctx->optimum[longest_rep_len].cost) { + ctx->optimum[longest_rep_len].state = state; + ctx->optimum[longest_rep_len].prev.link = 0; + ctx->optimum[longest_rep_len].prev.match_offset = longest_rep_offset; + ctx->optimum[longest_rep_len].cost = cost; + } + } + + cur_pos = 0; + for (;;) { + u32 cost; + struct lzms_adaptive_state state; + + cur_pos++; + + if (cur_pos == end_pos || cur_pos == ctx->params.optim_array_length) + return lzms_match_chooser_reverse_list(ctx, cur_pos); + + longest_rep_len = ctx->params.min_match_length - 1; + if (lz_mf_get_position(ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) { + u32 limit = lz_mf_get_bytes_remaining(ctx->mf); + for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) { + u32 offset = ctx->optimum[cur_pos].state.lru.recent_offsets[i]; + const u8 *strptr = lz_mf_get_window_ptr(ctx->mf); + const u8 *matchptr = strptr - offset; + u32 len = 0; + while (len < limit && strptr[len] == matchptr[len]) + len++; + if (len > longest_rep_len) { + longest_rep_len = len; + longest_rep_offset = offset; + } + } + } + + if (longest_rep_len >= ctx->params.nice_match_length) { + match = lzms_match_chooser_reverse_list(ctx, cur_pos); + + ctx->optimum[cur_pos].next.match_offset = longest_rep_offset; + ctx->optimum[cur_pos].next.link = cur_pos + longest_rep_len; + ctx->optimum_end_idx = cur_pos + longest_rep_len; + + lzms_skip_bytes(ctx, longest_rep_len); + + return match; + } + + num_matches = lzms_get_matches(ctx, &matches); + + if (num_matches) { + longest_len = matches[num_matches - 1].len; + if (longest_len >= ctx->params.nice_match_length) { + match = lzms_match_chooser_reverse_list(ctx, cur_pos); + + ctx->optimum[cur_pos].next.match_offset = + matches[num_matches - 1].offset; + ctx->optimum[cur_pos].next.link = cur_pos + longest_len; + ctx->optimum_end_idx = cur_pos + longest_len; + + lzms_skip_bytes(ctx, longest_len - 1); + + return match; + } + } else { + longest_len = 1; + } + + while (end_pos < cur_pos + longest_len) + ctx->optimum[++end_pos].cost = MC_INFINITE_COST; + + state = ctx->optimum[cur_pos].state; + cost = ctx->optimum[cur_pos].cost + + lzms_get_literal_cost(ctx, + &state, + *(lz_mf_get_window_ptr(ctx->mf) - 1)); + if (cost < ctx->optimum[cur_pos + 1].cost) { + ctx->optimum[cur_pos + 1].state = state; + ctx->optimum[cur_pos + 1].cost = cost; + ctx->optimum[cur_pos + 1].prev.link = cur_pos; + } + + for (u32 i = 0, len = 2; i < num_matches; i++) { + u32 offset = matches[i].offset; + struct lzms_adaptive_state state; + u32 position_cost; + + state = ctx->optimum[cur_pos].state; + position_cost = ctx->optimum[cur_pos].cost; + position_cost += lzms_get_lz_match_cost_nolen(ctx, &state, offset); + + do { + u32 cost; + + cost = position_cost; + cost += lzms_get_length_cost(&ctx->length_encoder, len); + + if (cost < ctx->optimum[cur_pos + len].cost) { + ctx->optimum[cur_pos + len].state = state; + ctx->optimum[cur_pos + len].prev.link = cur_pos; + ctx->optimum[cur_pos + len].prev.match_offset = offset; + ctx->optimum[cur_pos + len].cost = cost; + } + } while (++len <= matches[i].len); + } + + if (longest_rep_len >= ctx->params.min_match_length) { + + while (end_pos < cur_pos + longest_rep_len) + ctx->optimum[++end_pos].cost = MC_INFINITE_COST; + + state = ctx->optimum[cur_pos].state; + + cost = ctx->optimum[cur_pos].cost + + lzms_get_lz_match_cost(ctx, + &state, + longest_rep_len, + longest_rep_offset); + if (cost <= ctx->optimum[cur_pos + longest_rep_len].cost) { + ctx->optimum[cur_pos + longest_rep_len].state = + state; + ctx->optimum[cur_pos + longest_rep_len].prev.link = + cur_pos; + ctx->optimum[cur_pos + longest_rep_len].prev.match_offset = + longest_rep_offset; + ctx->optimum[cur_pos + longest_rep_len].cost = + cost; + } + } + } } /* @@ -929,31 +1121,26 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx) * * Notes: * - * - This uses near-optimal LZ parsing backed by a suffix-array match-finder. - * More details can be found in the corresponding files (lz_optimal.h, - * lz_sarray.{h,c}). - * - * - This does not output any delta matches. It would take a specialized - * algorithm to find them, then more code in lz_optimal.h and here to handle - * evaluating and outputting them. + * - This does not output any delta matches. * * - The costs of literals and matches are estimated using the range encoder * states and the semi-adaptive Huffman codes. Except for range encoding * states, costs are assumed to be constant throughout a single run of the - * parsing algorithm, which can parse up to LZMS_OPTIM_ARRAY_SIZE bytes of - * data. This introduces a source of inaccuracy because the probabilities and + * parsing algorithm, which can parse up to @optim_array_length bytes of data. + * This introduces a source of inaccuracy because the probabilities and * Huffman codes can change over this part of the data. */ static void -lzms_normal_encode(struct lzms_compressor *ctx) +lzms_encode(struct lzms_compressor *ctx) { - struct raw_match match; + struct lz_match match; - /* Load window into suffix array match-finder. */ - lz_sarray_load_window(&ctx->lz_sarray, ctx->window, ctx->window_size); + /* Load window into the match-finder. */ + lz_mf_load_window(ctx->mf, ctx->window, ctx->window_size); /* Reset the match-chooser. */ - lz_match_chooser_begin(&ctx->mc); + ctx->optimum_cur_idx = 0; + ctx->optimum_end_idx = 0; while (ctx->cur_window_pos != ctx->window_size) { match = lzms_get_near_optimal_match(ctx); @@ -980,12 +1167,10 @@ lzms_init_range_encoder(struct lzms_range_encoder *enc, static void lzms_init_huffman_encoder(struct lzms_huffman_encoder *enc, struct lzms_output_bitstream *os, - const u32 *slot_base_tab, unsigned num_syms, unsigned rebuild_freq) { enc->os = os; - enc->slot_base_tab = slot_base_tab; enc->num_syms_written = 0; enc->rebuild_freq = rebuild_freq; enc->num_syms = num_syms; @@ -1008,7 +1193,6 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen, /* Copy the uncompressed data into the @ctx->window buffer. */ memcpy(ctx->window, udata, ulen); - memset(&ctx->window[ulen], 0, 8); ctx->cur_window_pos = 0; ctx->window_size = ulen; @@ -1028,23 +1212,23 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen, /* Initialize Huffman encoders for each alphabet used in the compressed * representation. */ lzms_init_huffman_encoder(&ctx->literal_encoder, &ctx->os, - NULL, LZMS_NUM_LITERAL_SYMS, + LZMS_NUM_LITERAL_SYMS, LZMS_LITERAL_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->lz_offset_encoder, &ctx->os, - lzms_position_slot_base, num_position_slots, + num_position_slots, LZMS_LZ_OFFSET_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->length_encoder, &ctx->os, - lzms_length_slot_base, LZMS_NUM_LEN_SYMS, + LZMS_NUM_LEN_SYMS, LZMS_LENGTH_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->delta_offset_encoder, &ctx->os, - lzms_position_slot_base, num_position_slots, + num_position_slots, LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->delta_power_encoder, &ctx->os, - NULL, LZMS_NUM_DELTA_POWER_SYMS, + LZMS_NUM_DELTA_POWER_SYMS, LZMS_DELTA_POWER_CODE_REBUILD_FREQ); /* Initialize range encoders, all of which wrap around the same @@ -1070,7 +1254,7 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen, &ctx->rc, LZMS_NUM_DELTA_REPEAT_MATCH_STATES); /* Initialize LRU match information. */ - lzms_init_lru_queues(&ctx->lru); + lzms_init_lru_queues(&ctx->lru); } /* Flush the output streams, prepare the final compressed data, and return its @@ -1120,6 +1304,107 @@ lzms_finalize(struct lzms_compressor *ctx, u8 *cdata, size_t csize_avail) return compressed_size; } + +static void +lzms_build_params(unsigned int compression_level, + struct lzms_compressor_params *lzms_params) +{ + lzms_params->min_match_length = (compression_level >= 50) ? 2 : 3; + lzms_params->nice_match_length = ((u64)compression_level * 32) / 50; + lzms_params->max_search_depth = ((u64)compression_level * 50) / 50; + lzms_params->optim_array_length = 224 + compression_level * 16; +} + +static void +lzms_build_mf_params(const struct lzms_compressor_params *lzms_params, + u32 max_window_size, struct lz_mf_params *mf_params) +{ + memset(mf_params, 0, sizeof(*mf_params)); + + mf_params->algorithm = LZ_MF_DEFAULT; + mf_params->max_window_size = max_window_size; + mf_params->min_match_len = lzms_params->min_match_length; + mf_params->max_search_depth = lzms_params->max_search_depth; + mf_params->nice_match_len = lzms_params->nice_match_length; +} + +static void +lzms_free_compressor(void *_ctx); + +static u64 +lzms_get_needed_memory(size_t max_block_size, unsigned int compression_level) +{ + struct lzms_compressor_params params; + + lzms_build_params(compression_level, ¶ms); + + u64 size = 0; + + size += sizeof(struct lzms_compressor); + size += max_block_size; + size += lz_mf_get_needed_memory(LZ_MF_DEFAULT, max_block_size); + size += params.max_search_depth * sizeof(struct lz_match); + size += (params.optim_array_length + params.nice_match_length) * + sizeof(struct lzms_mc_pos_data); + + return size; +} + +static int +lzms_create_compressor(size_t max_block_size, unsigned int compression_level, + void **ctx_ret) +{ + struct lzms_compressor *ctx; + struct lzms_compressor_params params; + struct lz_mf_params mf_params; + + if (max_block_size >= INT32_MAX) + return WIMLIB_ERR_INVALID_PARAM; + + lzms_build_params(compression_level, ¶ms); + lzms_build_mf_params(¶ms, max_block_size, &mf_params); + if (!lz_mf_params_valid(&mf_params)) + return WIMLIB_ERR_INVALID_PARAM; + + ctx = CALLOC(1, sizeof(struct lzms_compressor)); + if (!ctx) + goto oom; + + ctx->params = params; + ctx->max_block_size = max_block_size; + + ctx->window = MALLOC(max_block_size); + if (!ctx->window) + goto oom; + + ctx->mf = lz_mf_alloc(&mf_params); + if (!ctx->mf) + goto oom; + + ctx->matches = MALLOC(params.max_search_depth * sizeof(struct lz_match)); + if (!ctx->matches) + goto oom; + + ctx->optimum = MALLOC((params.optim_array_length + + params.nice_match_length) * + sizeof(struct lzms_mc_pos_data)); + if (!ctx->optimum) + goto oom; + + /* Initialize position and length slot data if not done already. */ + lzms_init_slots(); + + /* Initialize range encoding cost table if not done already. */ + lzms_init_rc_costs(); + + *ctx_ret = ctx; + return 0; + +oom: + lzms_free_compressor(ctx); + return WIMLIB_ERR_NOMEM; +} + static size_t lzms_compress(const void *uncompressed_data, size_t uncompressed_size, void *compressed_data, size_t compressed_size_avail, void *_ctx) @@ -1130,15 +1415,6 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size, LZMS_DEBUG("uncompressed_size=%zu, compressed_size_avail=%zu", uncompressed_size, compressed_size_avail); - /* Make sure the uncompressed size is compatible with this compressor. - */ - if (uncompressed_size > ctx->max_block_size) { - LZMS_DEBUG("Can't compress %zu bytes: LZMS context " - "only supports %u bytes", - uncompressed_size, ctx->max_block_size); - return 0; - } - /* Don't bother compressing extremely small inputs. */ if (uncompressed_size < 4) { LZMS_DEBUG("Input too small to bother compressing."); @@ -1162,10 +1438,7 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size, /* Compute and encode a literal/match sequence that decompresses to the * preprocessed data. */ - lzms_normal_encode(ctx); -#if 0 - lzms_fast_encode(ctx); -#endif + lzms_encode(ctx); /* Get and return the compressed data size. */ compressed_size = lzms_finalize(ctx, compressed_data, @@ -1179,47 +1452,6 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size, LZMS_DEBUG("Compressed %zu => %zu bytes", uncompressed_size, compressed_size); -#if defined(ENABLE_VERIFY_COMPRESSION) || defined(ENABLE_LZMS_DEBUG) - /* Verify that we really get the same thing back when decompressing. */ - { - struct wimlib_decompressor *decompressor; - - LZMS_DEBUG("Verifying LZMS compression."); - - if (0 == wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_LZMS, - ctx->max_block_size, - NULL, - &decompressor)) - { - int ret; - ret = wimlib_decompress(compressed_data, - compressed_size, - ctx->window, - uncompressed_size, - decompressor); - wimlib_free_decompressor(decompressor); - - if (ret) { - ERROR("Failed to decompress data we " - "compressed using LZMS algorithm"); - wimlib_assert(0); - return 0; - } - if (memcmp(uncompressed_data, ctx->window, - uncompressed_size)) - { - ERROR("Data we compressed using LZMS algorithm " - "didn't decompress to original"); - wimlib_assert(0); - return 0; - } - } else { - WARNING("Failed to create decompressor for " - "data verification!"); - } - } -#endif /* ENABLE_LZMS_DEBUG || ENABLE_VERIFY_COMPRESSION */ - return compressed_size; } @@ -1230,95 +1462,15 @@ lzms_free_compressor(void *_ctx) if (ctx) { FREE(ctx->window); -#if 0 - FREE(ctx->prev_tab); -#endif - lz_sarray_destroy(&ctx->lz_sarray); - lz_match_chooser_destroy(&ctx->mc); + lz_mf_free(ctx->mf); + FREE(ctx->matches); + FREE(ctx->optimum); FREE(ctx); } } -static const struct wimlib_lzms_compressor_params default_params = { - .hdr = sizeof(struct wimlib_lzms_compressor_params), - .min_match_length = 2, - .max_match_length = UINT32_MAX, - .nice_match_length = 32, - .max_search_depth = 50, - .max_matches_per_pos = 3, - .optim_array_length = 1024, -}; - -static int -lzms_create_compressor(size_t max_block_size, - const struct wimlib_compressor_params_header *_params, - void **ctx_ret) -{ - struct lzms_compressor *ctx; - const struct wimlib_lzms_compressor_params *params; - - if (max_block_size == 0 || max_block_size >= INT32_MAX) { - LZMS_DEBUG("Invalid max_block_size (%u)", max_block_size); - return WIMLIB_ERR_INVALID_PARAM; - } - - if (_params) - params = (const struct wimlib_lzms_compressor_params*)_params; - else - params = &default_params; - - if (params->max_match_length < params->min_match_length || - params->min_match_length < 2 || - params->optim_array_length == 0 || - min(params->max_match_length, params->nice_match_length) > 65536) { - LZMS_DEBUG("Invalid compression parameter!"); - return WIMLIB_ERR_INVALID_PARAM; - } - - ctx = CALLOC(1, sizeof(struct lzms_compressor)); - if (ctx == NULL) - goto oom; - - ctx->window = MALLOC(max_block_size + 8); - if (ctx->window == NULL) - goto oom; - -#if 0 - ctx->prev_tab = MALLOC(max_block_size * sizeof(ctx->prev_tab[0])); - if (ctx->prev_tab == NULL) - goto oom; -#endif - - if (!lz_sarray_init(&ctx->lz_sarray, max_block_size, - params->min_match_length, - params->max_match_length, - params->max_search_depth, - params->max_matches_per_pos)) - goto oom; - - if (!lz_match_chooser_init(&ctx->mc, - params->optim_array_length, - params->nice_match_length, - params->max_match_length)) - goto oom; - - /* Initialize position and length slot bases if not done already. */ - lzms_init_slot_bases(); - - /* Initialize range encoding cost table if not done already. */ - lzms_init_rc_costs(); - - ctx->max_block_size = max_block_size; - - *ctx_ret = ctx; - return 0; - -oom: - lzms_free_compressor(ctx); - return WIMLIB_ERR_NOMEM; -} - const struct compressor_ops lzms_compressor_ops = { + .get_needed_memory = lzms_get_needed_memory, .create_compressor = lzms_create_compressor, .compress = lzms_compress, .free_compressor = lzms_free_compressor,