X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzms-compress.c;h=3b0caab4a963e75cd60161d2fb5ddf5a05b8ffe2;hp=f7ef633ea0355829f463cf9e71d50e244f1bb88c;hb=394751ae13025edab605cd61c8e32819e3fb33a1;hpb=e940fda88a92ff9e931ec88fb4c0e1ebd6fa2dfb diff --git a/src/lzms-compress.c b/src/lzms-compress.c index f7ef633e..3b0caab4 100644 --- a/src/lzms-compress.c +++ b/src/lzms-compress.c @@ -24,8 +24,7 @@ /* This a compressor for the LZMS compression format. More details about this * format can be found in lzms-decompress.c. * - * This is currently an unsophisticated implementation that is fast but does not - * attain the best compression ratios allowed by the format. + * NOTE: this compressor currently does not code any delta matches. */ #ifdef HAVE_CONFIG_H @@ -39,12 +38,25 @@ #include "wimlib/compress_common.h" #include "wimlib/endianness.h" #include "wimlib/error.h" -#include "wimlib/lz_hash.h" +#include "wimlib/lz_sarray.h" #include "wimlib/lzms.h" #include "wimlib/util.h" #include #include +#include + +struct lzms_compressor; +struct lzms_adaptive_state { + struct lzms_lz_lru_queues lru; + u8 main_state; + u8 match_state; + u8 lz_match_state; + u8 lz_repeat_match_state[LZMS_NUM_RECENT_OFFSETS - 1]; +}; +#define LZ_ADAPTIVE_STATE struct lzms_adaptive_state +#define LZ_COMPRESSOR struct lzms_compressor +#include "wimlib/lz_optimal.h" /* Stucture used for writing raw bits to the end of the LZMS-compressed data as * a series of 16-bit little endian coding units. */ @@ -124,9 +136,7 @@ struct lzms_range_encoder { struct lzms_probability_entry prob_entries[LZMS_MAX_NUM_STATES]; }; -/* Structure used for Huffman encoding, optionally encoding larger "values" as a - * Huffman symbol specifying a slot and a slot-dependent number of extra bits. - * */ +/* Structure used for Huffman encoding. */ struct lzms_huffman_encoder { /* Bitstream to write Huffman-encoded symbols and verbatim bits to. @@ -134,9 +144,6 @@ struct lzms_huffman_encoder { */ struct lzms_output_bitstream *os; - /* Pointer to the slot base table to use. */ - const u32 *slot_base_tab; - /* Number of symbols that have been written using this code far. Reset * to 0 whenever the code is rebuilt. */ u32 num_syms_written; @@ -156,7 +163,7 @@ struct lzms_huffman_encoder { u8 lens[LZMS_MAX_NUM_SYMS]; /* The codeword of each symbol in the Huffman code. */ - u16 codewords[LZMS_MAX_NUM_SYMS]; + u32 codewords[LZMS_MAX_NUM_SYMS]; }; /* State of the LZMS compressor. */ @@ -170,9 +177,14 @@ struct lzms_compressor { /* Size of the data in @buffer. */ u32 window_size; - /* Temporary array used by lz_analyze_block(); must be at least as long - * as the window. */ - u32 *prev_tab; + /* Suffix array match-finder. */ + struct lz_sarray lz_sarray; + + /* Temporary space to store found matches. */ + struct raw_match *matches; + + /* Match-chooser. */ + struct lz_match_chooser mc; /* Maximum block size this compressor instantiation allows. This is the * allocated size of @window. */ @@ -201,33 +213,13 @@ struct lzms_compressor { struct lzms_huffman_encoder delta_power_encoder; struct lzms_huffman_encoder delta_offset_encoder; - /* LRU (least-recently-used) queue of LZ match offsets. */ - u64 recent_lz_offsets[LZMS_NUM_RECENT_OFFSETS + 1]; - - /* LRU (least-recently-used) queue of delta match powers. */ - u32 recent_delta_powers[LZMS_NUM_RECENT_OFFSETS + 1]; - - /* LRU (least-recently-used) queue of delta match offsets. */ - u32 recent_delta_offsets[LZMS_NUM_RECENT_OFFSETS + 1]; - - /* These variables are used to delay updates to the LRU queues by one - * decoded item. */ - u32 prev_lz_offset; - u32 prev_delta_power; - u32 prev_delta_offset; - u32 upcoming_lz_offset; - u32 upcoming_delta_power; - u32 upcoming_delta_offset; + /* LRU (least-recently-used) queues for match information. */ + struct lzms_lru_queues lru; /* Used for preprocessing. */ s32 last_target_usages[65536]; }; -struct lzms_match { - u32 length; - u32 offset; -}; - /* Initialize the output bitstream @os to write forwards to the specified * compressed data buffer @out that is @out_limit 16-bit integers long. */ static void @@ -432,7 +424,11 @@ static void lzms_huffman_encode_symbol(struct lzms_huffman_encoder *enc, u32 sym) { LZMS_ASSERT(sym < enc->num_syms); - if (enc->num_syms_written == enc->rebuild_freq) { + lzms_output_bitstream_put_bits(enc->os, + enc->codewords[sym], + enc->lens[sym]); + ++enc->sym_freqs[sym]; + if (++enc->num_syms_written == enc->rebuild_freq) { /* Adaptive code needs to be rebuilt. */ LZMS_DEBUG("Rebuilding code (num_syms=%u)", enc->num_syms); make_canonical_huffman_code(enc->num_syms, @@ -448,36 +444,38 @@ lzms_huffman_encode_symbol(struct lzms_huffman_encoder *enc, u32 sym) } enc->num_syms_written = 0; } - lzms_output_bitstream_put_bits(enc->os, - enc->codewords[sym], - enc->lens[sym]); - ++enc->num_syms_written; - ++enc->sym_freqs[sym]; } -/* Encode a number as a Huffman symbol specifying a slot, plus a number of - * slot-dependent extra bits. */ static void -lzms_encode_value(struct lzms_huffman_encoder *enc, u32 value) +lzms_encode_length(struct lzms_huffman_encoder *enc, u32 length) { unsigned slot; unsigned num_extra_bits; u32 extra_bits; - LZMS_ASSERT(enc->slot_base_tab != NULL); + slot = lzms_get_length_slot(length); - slot = lzms_get_slot(value, enc->slot_base_tab, enc->num_syms); + num_extra_bits = lzms_extra_length_bits[slot]; - /* Get the number of extra bits needed to represent the range of values - * that share the slot. */ - num_extra_bits = bsr32(enc->slot_base_tab[slot + 1] - - enc->slot_base_tab[slot]); + extra_bits = length - lzms_length_slot_base[slot]; - /* Calculate the extra bits as the offset from the slot base. */ - extra_bits = value - enc->slot_base_tab[slot]; + lzms_huffman_encode_symbol(enc, slot); + lzms_output_bitstream_put_bits(enc->os, extra_bits, num_extra_bits); +} + +static void +lzms_encode_offset(struct lzms_huffman_encoder *enc, u32 offset) +{ + unsigned slot; + unsigned num_extra_bits; + u32 extra_bits; + + slot = lzms_get_position_slot(offset); + + num_extra_bits = lzms_extra_position_bits[slot]; + + extra_bits = offset - lzms_position_slot_base[slot]; - /* Output the slot (Huffman-encoded), then the extra bits (verbatim). - */ lzms_huffman_encode_symbol(enc, slot); lzms_output_bitstream_put_bits(enc->os, extra_bits, num_extra_bits); } @@ -485,9 +483,9 @@ lzms_encode_value(struct lzms_huffman_encoder *enc, u32 value) static void lzms_begin_encode_item(struct lzms_compressor *ctx) { - ctx->upcoming_delta_offset = 0; - ctx->upcoming_lz_offset = 0; - ctx->upcoming_delta_power = 0; + ctx->lru.lz.upcoming_offset = 0; + ctx->lru.delta.upcoming_offset = 0; + ctx->lru.delta.upcoming_power = 0; } static void @@ -495,26 +493,7 @@ lzms_end_encode_item(struct lzms_compressor *ctx, u32 length) { LZMS_ASSERT(ctx->window_size - ctx->cur_window_pos >= length); ctx->cur_window_pos += length; - - /* Update LRU queues */ - if (ctx->prev_lz_offset != 0) { - for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) - ctx->recent_lz_offsets[i + 1] = ctx->recent_lz_offsets[i]; - ctx->recent_lz_offsets[0] = ctx->prev_lz_offset; - } - - if (ctx->prev_delta_offset != 0) { - for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--) { - ctx->recent_delta_powers[i + 1] = ctx->recent_delta_powers[i]; - ctx->recent_delta_offsets[i + 1] = ctx->recent_delta_offsets[i]; - } - ctx->recent_delta_powers[0] = ctx->prev_delta_power; - ctx->recent_delta_offsets[0] = ctx->prev_delta_offset; - } - - ctx->prev_lz_offset = ctx->upcoming_lz_offset; - ctx->prev_delta_offset = ctx->upcoming_delta_offset; - ctx->prev_delta_power = ctx->upcoming_delta_power; + lzms_update_lru_queues(&ctx->lru); } /* Encode a literal byte. */ @@ -541,38 +520,44 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) { int recent_offset_idx; - lzms_begin_encode_item(ctx); - LZMS_DEBUG("Position %u: Encoding LZ match {length=%u, offset=%u}", ctx->cur_window_pos, length, offset); + LZMS_ASSERT(length <= ctx->window_size - ctx->cur_window_pos); + LZMS_ASSERT(offset <= ctx->cur_window_pos); + LZMS_ASSERT(!memcmp(&ctx->window[ctx->cur_window_pos], + &ctx->window[ctx->cur_window_pos - offset], + length)); + + lzms_begin_encode_item(ctx); + /* Main bit: 1 = a match, not a literal. */ lzms_range_encode_bit(&ctx->main_range_encoder, 1); - /* Match bit: 0 = a LZ match, not a delta match. */ + /* Match bit: 0 = an LZ match, not a delta match. */ lzms_range_encode_bit(&ctx->match_range_encoder, 0); /* Determine if the offset can be represented as a recent offset. */ for (recent_offset_idx = 0; recent_offset_idx < LZMS_NUM_RECENT_OFFSETS; recent_offset_idx++) - if (offset == ctx->recent_lz_offsets[recent_offset_idx]) + if (offset == ctx->lru.lz.recent_offsets[recent_offset_idx]) break; if (recent_offset_idx == LZMS_NUM_RECENT_OFFSETS) { /* Explicit offset. */ - /* LZ match bit: 0 = explicit offset, not a repeat offset. */ + /* LZ match bit: 0 = explicit offset, not a recent offset. */ lzms_range_encode_bit(&ctx->lz_match_range_encoder, 0); /* Encode the match offset. */ - lzms_encode_value(&ctx->lz_offset_encoder, offset); + lzms_encode_offset(&ctx->lz_offset_encoder, offset); } else { int i; - /* Repeat offset. */ + /* Recent offset. */ - /* LZ match bit: 1 = repeat offset, not an explicit offset. */ + /* LZ match bit: 1 = recent offset, not an explicit offset. */ lzms_range_encode_bit(&ctx->lz_match_range_encoder, 1); /* Encode the recent offset index. A 1 bit is encoded for each @@ -587,91 +572,334 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) /* Initial update of the LZ match offset LRU queue. */ for (; i < LZMS_NUM_RECENT_OFFSETS; i++) - ctx->recent_lz_offsets[i] = ctx->recent_lz_offsets[i + 1]; + ctx->lru.lz.recent_offsets[i] = ctx->lru.lz.recent_offsets[i + 1]; } /* Encode the match length. */ - lzms_encode_value(&ctx->length_encoder, length); + lzms_encode_length(&ctx->length_encoder, length); /* Save the match offset for later insertion at the front of the LZ * match offset LRU queue. */ - ctx->upcoming_lz_offset = offset; + ctx->lru.lz.upcoming_offset = offset; lzms_end_encode_item(ctx, length); } -static void -lzms_record_literal(u8 literal, void *_ctx) +/* Fast heuristic cost evaluation to use in the inner loop of the match-finder. + * Unlike lzms_get_lz_match_cost(), which does a true cost evaluation, this + * simply prioritize matches based on their offset. */ +static input_idx_t +lzms_lz_match_cost_fast(input_idx_t length, input_idx_t offset, const void *_lru) { - struct lzms_compressor *ctx = _ctx; + const struct lzms_lz_lru_queues *lru = _lru; + + for (input_idx_t i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) + if (offset == lru->recent_offsets[i]) + return i; - lzms_encode_literal(ctx, literal); + return offset; } +#define LZMS_COST_SHIFT 5 + +/*#define LZMS_RC_COSTS_USE_FLOATING_POINT*/ + +static u32 +lzms_rc_costs[LZMS_PROBABILITY_MAX + 1]; + +#ifdef LZMS_RC_COSTS_USE_FLOATING_POINT +# include +#endif + static void -lzms_record_match(unsigned length, unsigned offset, void *_ctx) +lzms_do_init_rc_costs(void) { - struct lzms_compressor *ctx = _ctx; - - lzms_encode_lz_match(ctx, length, offset); + /* Fill in a table that maps range coding probabilities needed to code a + * bit X (0 or 1) to the number of bits (scaled by a constant factor, to + * handle fractional costs) needed to code that bit X. + * + * Consider the range of the range decoder. To eliminate exactly half + * the range (logical probability of 0.5), we need exactly 1 bit. For + * lower probabilities we need more bits and for higher probabilities we + * need fewer bits. In general, a logical probability of N will + * eliminate the proportion 1 - N of the range; this information takes + * log2(1 / N) bits to encode. + * + * The below loop is simply calculating this number of bits for each + * possible probability allowed by the LZMS compression format, but + * without using real numbers. To handle fractional probabilities, each + * cost is multiplied by (1 << LZMS_COST_SHIFT). These techniques are + * based on those used by LZMA. + * + * Note that in LZMS, a probability x really means x / 64, and 0 / 64 is + * really interpreted as 1 / 64 and 64 / 64 is really interpreted as + * 63 / 64. + */ + for (u32 i = 0; i <= LZMS_PROBABILITY_MAX; i++) { + u32 prob = i; + + if (prob == 0) + prob = 1; + else if (prob == LZMS_PROBABILITY_MAX) + prob = LZMS_PROBABILITY_MAX - 1; + + #ifdef LZMS_RC_COSTS_USE_FLOATING_POINT + lzms_rc_costs[i] = log2((double)LZMS_PROBABILITY_MAX / prob) * + (1 << LZMS_COST_SHIFT); + #else + u32 w = prob; + u32 bit_count = 0; + for (u32 j = 0; j < LZMS_COST_SHIFT; j++) { + w *= w; + bit_count <<= 1; + while (w >= (1U << 16)) { + w >>= 1; + ++bit_count; + } + } + lzms_rc_costs[i] = (LZMS_PROBABILITY_BITS << LZMS_COST_SHIFT) - + (15 + bit_count); + #endif + } } static void -lzms_fast_encode(struct lzms_compressor *ctx) +lzms_init_rc_costs(void) { - static const struct lz_params lzms_lz_params = { - .min_match = 3, - .max_match = UINT_MAX, - .max_offset = UINT_MAX, - .nice_match = 64, - .good_match = 32, - .max_chain_len = 64, - .max_lazy_match = 258, - .too_far = 4096, - }; + static bool done = false; + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + if (unlikely(!done)) { + pthread_mutex_lock(&mutex); + if (!done) { + lzms_do_init_rc_costs(); + done = true; + } + pthread_mutex_unlock(&mutex); + } +} + +/* + * Return the cost to range-encode the specified bit when in the specified + * state. + * + * @enc The range encoder to use. + * @cur_state Current state, which indicates the probability entry to choose. + * Updated by this function. + * @bit The bit to encode (0 or 1). + */ +static u32 +lzms_rc_bit_cost(const struct lzms_range_encoder *enc, u8 *cur_state, int bit) +{ + u32 prob_zero; + u32 prob_correct; + + prob_zero = enc->prob_entries[*cur_state & enc->mask].num_recent_zero_bits; + + *cur_state = (*cur_state << 1) | bit; + + if (bit == 0) + prob_correct = prob_zero; + else + prob_correct = LZMS_PROBABILITY_MAX - prob_zero; + + return lzms_rc_costs[prob_correct]; +} + +static u32 +lzms_huffman_symbol_cost(const struct lzms_huffman_encoder *enc, u32 sym) +{ + return enc->lens[sym] << LZMS_COST_SHIFT; +} + +static u32 +lzms_offset_cost(const struct lzms_huffman_encoder *enc, u32 offset) +{ + u32 slot; + u32 num_extra_bits; + u32 cost = 0; + + slot = lzms_get_position_slot(offset); + + cost += lzms_huffman_symbol_cost(enc, slot); + + num_extra_bits = lzms_extra_position_bits[slot]; + + cost += num_extra_bits << LZMS_COST_SHIFT; + + return cost; +} + +static u32 +lzms_length_cost(const struct lzms_huffman_encoder *enc, u32 length) +{ + u32 slot; + u32 num_extra_bits; + u32 cost = 0; + + slot = lzms_get_length_slot(length); - lz_analyze_block(ctx->window, - ctx->window_size, - lzms_record_match, - lzms_record_literal, - ctx, - &lzms_lz_params, - ctx->prev_tab); + cost += lzms_huffman_symbol_cost(enc, slot); + num_extra_bits = lzms_extra_length_bits[slot]; + + cost += num_extra_bits << LZMS_COST_SHIFT; + + return cost; } -#if 0 +static u32 +lzms_get_matches(struct lzms_compressor *ctx, + const struct lzms_adaptive_state *state, + struct raw_match **matches_ret) +{ + *matches_ret = ctx->matches; + return lz_sarray_get_matches(&ctx->lz_sarray, + ctx->matches, + lzms_lz_match_cost_fast, + &state->lru); +} -static struct lzms_match -lzms_get_best_match(struct lzms_compressor *ctx) +static void +lzms_skip_bytes(struct lzms_compressor *ctx, input_idx_t n) { - struct lzms_match match; + while (n--) + lz_sarray_skip_position(&ctx->lz_sarray); +} - /* TODO */ +static u32 +lzms_get_prev_literal_cost(struct lzms_compressor *ctx, + struct lzms_adaptive_state *state) +{ + u8 literal = ctx->window[lz_sarray_get_pos(&ctx->lz_sarray) - 1]; + u32 cost = 0; + + state->lru.upcoming_offset = 0; + lzms_update_lz_lru_queues(&state->lru); + + cost += lzms_rc_bit_cost(&ctx->main_range_encoder, + &state->main_state, 0); - match.length = 0; + cost += lzms_huffman_symbol_cost(&ctx->literal_encoder, literal); + + return cost; +} - return match; +static u32 +lzms_get_lz_match_cost(struct lzms_compressor *ctx, + struct lzms_adaptive_state *state, + input_idx_t length, input_idx_t offset) +{ + u32 cost = 0; + int recent_offset_idx; + + cost += lzms_rc_bit_cost(&ctx->main_range_encoder, + &state->main_state, 1); + cost += lzms_rc_bit_cost(&ctx->match_range_encoder, + &state->match_state, 0); + + for (recent_offset_idx = 0; + recent_offset_idx < LZMS_NUM_RECENT_OFFSETS; + recent_offset_idx++) + if (offset == state->lru.recent_offsets[recent_offset_idx]) + break; + + if (recent_offset_idx == LZMS_NUM_RECENT_OFFSETS) { + /* Explicit offset. */ + cost += lzms_rc_bit_cost(&ctx->lz_match_range_encoder, + &state->lz_match_state, 0); + + cost += lzms_offset_cost(&ctx->lz_offset_encoder, offset); + } else { + int i; + + /* Recent offset. */ + cost += lzms_rc_bit_cost(&ctx->lz_match_range_encoder, + &state->lz_match_state, 1); + + for (i = 0; i < recent_offset_idx; i++) + cost += lzms_rc_bit_cost(&ctx->lz_repeat_match_range_encoders[i], + &state->lz_repeat_match_state[i], 0); + + if (i < LZMS_NUM_RECENT_OFFSETS - 1) + cost += lzms_rc_bit_cost(&ctx->lz_repeat_match_range_encoders[i], + &state->lz_repeat_match_state[i], 1); + + + /* Initial update of the LZ match offset LRU queue. */ + for (; i < LZMS_NUM_RECENT_OFFSETS; i++) + state->lru.recent_offsets[i] = state->lru.recent_offsets[i + 1]; + } + + cost += lzms_length_cost(&ctx->length_encoder, length); + + state->lru.upcoming_offset = offset; + lzms_update_lz_lru_queues(&state->lru); + + return cost; +} + +static struct raw_match +lzms_get_near_optimal_match(struct lzms_compressor *ctx) +{ + struct lzms_adaptive_state initial_state; + + initial_state.lru = ctx->lru.lz; + initial_state.main_state = ctx->main_range_encoder.state; + initial_state.match_state = ctx->match_range_encoder.state; + initial_state.lz_match_state = ctx->lz_match_range_encoder.state; + for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS - 1; i++) + initial_state.lz_repeat_match_state[i] = + ctx->lz_repeat_match_range_encoders[i].state; + return lz_get_near_optimal_match(&ctx->mc, + lzms_get_matches, + lzms_skip_bytes, + lzms_get_prev_literal_cost, + lzms_get_lz_match_cost, + ctx, + &initial_state); } +/* + * The main loop for the LZMS compressor. + * + * Notes: + * + * - This uses near-optimal LZ parsing backed by a suffix-array match-finder. + * More details can be found in the corresponding files (lz_optimal.h, + * lz_sarray.{h,c}). + * + * - This does not output any delta matches. It would take a specialized + * algorithm to find them, then more code in lz_optimal.h and here to handle + * evaluating and outputting them. + * + * - The costs of literals and matches are estimated using the range encoder + * states and the semi-adaptive Huffman codes. Except for range encoding + * states, costs are assumed to be constant throughout a single run of the + * parsing algorithm, which can parse up to @optim_array_length (from the + * `struct wimlib_lzms_compressor_params') bytes of data. This introduces a + * source of inaccuracy because the probabilities and Huffman codes can change + * over this part of the data. + */ static void -lzms_slow_encode(struct lzms_compressor *ctx) +lzms_encode(struct lzms_compressor *ctx) { - struct lzms_match match; + struct raw_match match; + + /* Load window into suffix array match-finder. */ + lz_sarray_load_window(&ctx->lz_sarray, ctx->window, ctx->window_size); + + /* Reset the match-chooser. */ + lz_match_chooser_begin(&ctx->mc); - /* TODO */ while (ctx->cur_window_pos != ctx->window_size) { - match = lzms_get_best_match(ctx); - if (match.length == 0) { - /* Literal */ + match = lzms_get_near_optimal_match(ctx); + if (match.len <= 1) lzms_encode_literal(ctx, ctx->window[ctx->cur_window_pos]); - } else { - /* LZ match */ - lzms_encode_lz_match(ctx, match.length, match.offset); - } + else + lzms_encode_lz_match(ctx, match.len, match.offset); } } -#endif static void lzms_init_range_encoder(struct lzms_range_encoder *enc, @@ -689,17 +917,21 @@ lzms_init_range_encoder(struct lzms_range_encoder *enc, static void lzms_init_huffman_encoder(struct lzms_huffman_encoder *enc, struct lzms_output_bitstream *os, - const u32 *slot_base_tab, unsigned num_syms, unsigned rebuild_freq) { enc->os = os; - enc->slot_base_tab = slot_base_tab; - enc->num_syms_written = rebuild_freq; + enc->num_syms_written = 0; enc->rebuild_freq = rebuild_freq; enc->num_syms = num_syms; for (unsigned i = 0; i < num_syms; i++) enc->sym_freqs[i] = 1; + + make_canonical_huffman_code(enc->num_syms, + LZMS_MAX_CODEWORD_LEN, + enc->sym_freqs, + enc->lens, + enc->codewords); } /* Initialize the LZMS compressor. */ @@ -711,7 +943,6 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen, /* Copy the uncompressed data into the @ctx->window buffer. */ memcpy(ctx->window, udata, ulen); - memset(&ctx->window[ulen], 0, 8); ctx->cur_window_pos = 0; ctx->window_size = ulen; @@ -722,9 +953,6 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen, * (writing backwards). */ lzms_output_bitstream_init(&ctx->os, cdata, clen16); - /* Initialize position and length slot bases if not done already. */ - lzms_init_slot_bases(); - /* Calculate the number of position slots needed for this compressed * block. */ num_position_slots = lzms_get_position_slot(ulen - 1) + 1; @@ -734,23 +962,23 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen, /* Initialize Huffman encoders for each alphabet used in the compressed * representation. */ lzms_init_huffman_encoder(&ctx->literal_encoder, &ctx->os, - NULL, LZMS_NUM_LITERAL_SYMS, + LZMS_NUM_LITERAL_SYMS, LZMS_LITERAL_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->lz_offset_encoder, &ctx->os, - lzms_position_slot_base, num_position_slots, + num_position_slots, LZMS_LZ_OFFSET_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->length_encoder, &ctx->os, - lzms_length_slot_base, LZMS_NUM_LEN_SYMS, + LZMS_NUM_LEN_SYMS, LZMS_LENGTH_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->delta_offset_encoder, &ctx->os, - lzms_position_slot_base, num_position_slots, + num_position_slots, LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->delta_power_encoder, &ctx->os, - NULL, LZMS_NUM_DELTA_POWER_SYMS, + LZMS_NUM_DELTA_POWER_SYMS, LZMS_DELTA_POWER_CODE_REBUILD_FREQ); /* Initialize range encoders, all of which wrap around the same @@ -775,20 +1003,8 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen, lzms_init_range_encoder(&ctx->delta_repeat_match_range_encoders[i], &ctx->rc, LZMS_NUM_DELTA_REPEAT_MATCH_STATES); - /* Initialize the LRU queue for recent match offsets. */ - for (size_t i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) - ctx->recent_lz_offsets[i] = i + 1; - - for (size_t i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) { - ctx->recent_delta_powers[i] = 0; - ctx->recent_delta_offsets[i] = i + 1; - } - ctx->prev_lz_offset = 0; - ctx->prev_delta_offset = 0; - ctx->prev_delta_power = 0; - ctx->upcoming_lz_offset = 0; - ctx->upcoming_delta_offset = 0; - ctx->upcoming_delta_power = 0; + /* Initialize LRU match information. */ + lzms_init_lru_queues(&ctx->lru); } /* Flush the output streams, prepare the final compressed data, and return its @@ -822,8 +1038,8 @@ lzms_finalize(struct lzms_compressor *ctx, u8 *cdata, size_t csize_avail) /* Now the compressed buffer contains the data output by the forwards * bitstream, then empty space, then data output by the backwards - * bitstream. Move the data output by the forwards bitstream to be - * adjacent to the data output by the backwards bitstream, and calculate + * bitstream. Move the data output by the backwards bitstream to be + * adjacent to the data output by the forward bitstream, and calculate * the compressed size that this results in. */ num_forwards_bytes = (u8*)ctx->rc.out - (u8*)cdata; num_backwards_bytes = ((u8*)cdata + csize_avail) - (u8*)ctx->os.out; @@ -834,7 +1050,7 @@ lzms_finalize(struct lzms_compressor *ctx, u8 *cdata, size_t csize_avail) LZMS_DEBUG("num_forwards_bytes=%zu, num_backwards_bytes=%zu, " "compressed_size=%zu", num_forwards_bytes, num_backwards_bytes, compressed_size); - LZMS_ASSERT(!(compressed_size & 1)); + LZMS_ASSERT(compressed_size % 2 == 0); return compressed_size; } @@ -858,10 +1074,12 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size, } /* Don't bother compressing extremely small inputs. */ - if (uncompressed_size < 4) + if (uncompressed_size < 4) { + LZMS_DEBUG("Input too small to bother compressing."); return 0; + } - /* Cap the available compressed size to a 32-bit integer, and round it + /* Cap the available compressed size to a 32-bit integer and round it * down to the nearest multiple of 2. */ if (compressed_size_avail > UINT32_MAX) compressed_size_avail = UINT32_MAX; @@ -876,9 +1094,9 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size, lzms_x86_filter(ctx->window, ctx->window_size, ctx->last_target_usages, false); - /* Determine and output a literal/match sequence that decompresses to - * the preprocessed data. */ - lzms_fast_encode(ctx); + /* Compute and encode a literal/match sequence that decompresses to the + * preprocessed data. */ + lzms_encode(ctx); /* Get and return the compressed data size. */ compressed_size = lzms_finalize(ctx, compressed_data, @@ -943,17 +1161,49 @@ lzms_free_compressor(void *_ctx) if (ctx) { FREE(ctx->window); - FREE(ctx->prev_tab); + FREE(ctx->matches); + lz_sarray_destroy(&ctx->lz_sarray); + lz_match_chooser_destroy(&ctx->mc); FREE(ctx); } } +static const struct wimlib_lzms_compressor_params lzms_default = { + .hdr = { + .size = sizeof(struct wimlib_lzms_compressor_params), + }, + .min_match_length = 2, + .max_match_length = UINT32_MAX, + .nice_match_length = 32, + .max_search_depth = 50, + .max_matches_per_pos = 3, + .optim_array_length = 1024, +}; + +static bool +lzms_params_valid(const struct wimlib_compressor_params_header *); + +static const struct wimlib_lzms_compressor_params * +lzms_get_params(const struct wimlib_compressor_params_header *_params) +{ + const struct wimlib_lzms_compressor_params *params = + (const struct wimlib_lzms_compressor_params*)_params; + + if (params == NULL) + params = &lzms_default; + + LZMS_ASSERT(lzms_params_valid(¶ms->hdr)); + + return params; +} + static int lzms_create_compressor(size_t max_block_size, - const struct wimlib_compressor_params_header *params, + const struct wimlib_compressor_params_header *_params, void **ctx_ret) { struct lzms_compressor *ctx; + const struct wimlib_lzms_compressor_params *params = lzms_get_params(_params); if (max_block_size == 0 || max_block_size >= INT32_MAX) { LZMS_DEBUG("Invalid max_block_size (%u)", max_block_size); @@ -964,14 +1214,36 @@ lzms_create_compressor(size_t max_block_size, if (ctx == NULL) goto oom; - ctx->window = MALLOC(max_block_size + 8); + ctx->window = MALLOC(max_block_size); if (ctx->window == NULL) goto oom; - ctx->prev_tab = MALLOC(max_block_size * sizeof(ctx->prev_tab[0])); - if (ctx->prev_tab == NULL) + ctx->matches = MALLOC(min(params->max_match_length - + params->min_match_length + 1, + params->max_matches_per_pos) * + sizeof(ctx->matches[0])); + if (ctx->matches == NULL) goto oom; + if (!lz_sarray_init(&ctx->lz_sarray, max_block_size, + params->min_match_length, + min(params->max_match_length, LZ_SARRAY_LEN_MAX), + params->max_search_depth, + params->max_matches_per_pos)) + goto oom; + + if (!lz_match_chooser_init(&ctx->mc, + params->optim_array_length, + params->nice_match_length, + params->max_match_length)) + goto oom; + + /* Initialize position and length slot data if not done already. */ + lzms_init_slots(); + + /* Initialize range encoding cost table if not done already. */ + lzms_init_rc_costs(); + ctx->max_block_size = max_block_size; *ctx_ret = ctx; @@ -982,7 +1254,46 @@ oom: return WIMLIB_ERR_NOMEM; } +static u64 +lzms_get_needed_memory(size_t max_block_size, + const struct wimlib_compressor_params_header *_params) +{ + const struct wimlib_lzms_compressor_params *params = lzms_get_params(_params); + + u64 size = 0; + + size += max_block_size; + size += sizeof(struct lzms_compressor); + size += lz_sarray_get_needed_memory(max_block_size); + size += lz_match_chooser_get_needed_memory(params->optim_array_length, + params->nice_match_length, + params->max_match_length); + size += min(params->max_match_length - + params->min_match_length + 1, + params->max_matches_per_pos) * + sizeof(((struct lzms_compressor*)0)->matches[0]); + return size; +} + +static bool +lzms_params_valid(const struct wimlib_compressor_params_header *_params) +{ + const struct wimlib_lzms_compressor_params *params = + (const struct wimlib_lzms_compressor_params*)_params; + + if (params->hdr.size != sizeof(*params) || + params->max_match_length < params->min_match_length || + params->min_match_length < 2 || + params->optim_array_length == 0 || + min(params->max_match_length, params->nice_match_length) > 65536) + return false; + + return true; +} + const struct compressor_ops lzms_compressor_ops = { + .params_valid = lzms_params_valid, + .get_needed_memory = lzms_get_needed_memory, .create_compressor = lzms_create_compressor, .compress = lzms_compress, .free_compressor = lzms_free_compressor,