X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzx-compress.c;h=cdbb63699004edbed7066f9344f6d1c6f30207d2;hp=aedd05a86e8a64d78430f781f79a045998fce8fb;hb=10be6c712f4f5f066458dbab11e81f8e64cd2324;hpb=14a1547f3404346127a82e643809bc837425ec8e diff --git a/src/lzx-compress.c b/src/lzx-compress.c index aedd05a8..cdbb6369 100644 --- a/src/lzx-compress.c +++ b/src/lzx-compress.c @@ -254,7 +254,7 @@ #define LZX_CACHE_PER_POS 8 #define LZX_CACHE_LEN (LZX_DIV_BLOCK_SIZE * (LZX_CACHE_PER_POS + 1)) -#define LZX_CACHE_SIZE (LZX_CACHE_LEN * sizeof(struct raw_match)) +#define LZX_CACHE_SIZE (LZX_CACHE_LEN * sizeof(struct lz_match)) #define LZX_MAX_MATCHES_PER_POS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1) /* Codewords for the LZX main, length, and aligned offset Huffman codes */ @@ -279,7 +279,7 @@ struct lzx_lens { * * If a codeword has zero frequency, it must still be assigned some nonzero cost * --- generally a high cost, since even if it gets used in the next iteration, - * it probably will not be used very times. */ + * it probably will not be used very many times. */ struct lzx_costs { u8 main[LZX_MAINCODE_MAX_NUM_SYMBOLS]; u8 len[LZX_LENCODE_NUM_SYMBOLS]; @@ -294,13 +294,13 @@ struct lzx_codes { /* Tables for tallying symbol frequencies in the three LZX alphabets */ struct lzx_freqs { - input_idx_t main[LZX_MAINCODE_MAX_NUM_SYMBOLS]; - input_idx_t len[LZX_LENCODE_NUM_SYMBOLS]; - input_idx_t aligned[LZX_ALIGNEDCODE_NUM_SYMBOLS]; + u32 main[LZX_MAINCODE_MAX_NUM_SYMBOLS]; + u32 len[LZX_LENCODE_NUM_SYMBOLS]; + u32 aligned[LZX_ALIGNEDCODE_NUM_SYMBOLS]; }; /* LZX intermediate match/literal format */ -struct lzx_match { +struct lzx_item { /* Bit Description * * 31 1 if a match, 0 if a literal. @@ -325,16 +325,16 @@ struct lzx_block_spec { int block_type; /* 0-based position in the window at which this block starts. */ - input_idx_t window_pos; + u32 window_pos; /* The number of bytes of uncompressed data this block represents. */ - input_idx_t block_size; + u32 block_size; /* The match/literal sequence for this block. */ - struct lzx_match *chosen_matches; + struct lzx_item *chosen_items; - /* The length of the @chosen_matches sequence. */ - input_idx_t num_chosen_matches; + /* The length of the @chosen_items sequence. */ + u32 num_chosen_items; /* Huffman codes for this block. */ struct lzx_codes codes; @@ -363,10 +363,10 @@ struct lzx_compressor { /* Number of bytes of data to be compressed, which is the number of * bytes of data in @window that are actually valid. */ - input_idx_t window_size; + u32 window_size; /* Allocated size of the @window. */ - input_idx_t max_window_size; + u32 max_window_size; /* Number of symbols in the main alphabet (depends on the * @max_window_size since it determines the maximum allowed offset). */ @@ -377,7 +377,7 @@ struct lzx_compressor { /* Space for the sequences of matches/literals that were chosen for each * block. */ - struct lzx_match *chosen_matches; + struct lzx_item *chosen_items; /* Information about the LZX blocks the preprocessed input was divided * into. */ @@ -397,27 +397,27 @@ struct lzx_compressor { struct lzx_costs costs; /* Fast algorithm only: Array of hash table links. */ - input_idx_t *prev_tab; + u32 *prev_tab; /* Slow algorithm only: Binary tree match-finder. */ struct lz_bt mf; /* Position in window of next match to return. */ - input_idx_t match_window_pos; + u32 match_window_pos; /* The end-of-block position. We can't allow any matches to span this * position. */ - input_idx_t match_window_end; + u32 match_window_end; /* Matches found by the match-finder are cached in the following array * to achieve a slight speedup when the same matches are needed on * subsequent passes. This is suboptimal because different matches may * be preferred with different cost models, but seems to be a worthwhile * speedup. */ - struct raw_match *cached_matches; - struct raw_match *cache_ptr; + struct lz_match *cached_matches; + struct lz_match *cache_ptr; bool matches_cached; - struct raw_match *cache_limit; + struct lz_match *cache_limit; /* Match-chooser state. * When matches have been chosen, optimum_cur_idx is set to the position @@ -453,22 +453,22 @@ struct lzx_mc_pos_data { /* Position of the start of the match or literal that * was taken to get to this position in the approximate * minimum-cost parse. */ - input_idx_t link; + u32 link; /* Offset (as in an LZ (length, offset) pair) of the * match or literal that was taken to get to this * position in the approximate minimum-cost parse. */ - input_idx_t match_offset; + u32 match_offset; } prev; struct { /* Position at which the match or literal starting at * this position ends in the minimum-cost parse. */ - input_idx_t link; + u32 link; /* Offset (as in an LZ (length, offset) pair) of the * match or literal starting at this position in the * approximate minimum-cost parse. */ - input_idx_t match_offset; + u32 match_offset; } next; }; @@ -557,7 +557,7 @@ lzx_make_huffman_codes(const struct lzx_freqs *freqs, */ static void lzx_write_match(struct output_bitstream *out, int block_type, - struct lzx_match match, const struct lzx_codes *codes) + struct lzx_item match, const struct lzx_codes *codes) { /* low 8 bits are the match length minus 2 */ unsigned match_len_minus_2 = match.data & 0xff; @@ -643,7 +643,7 @@ static unsigned lzx_build_precode(const u8 lens[restrict], const u8 prev_lens[restrict], const unsigned num_syms, - input_idx_t precode_freqs[restrict LZX_PRECODE_NUM_SYMBOLS], + u32 precode_freqs[restrict LZX_PRECODE_NUM_SYMBOLS], u8 output_syms[restrict num_syms], u8 precode_lens[restrict LZX_PRECODE_NUM_SYMBOLS], u32 precode_codewords[restrict LZX_PRECODE_NUM_SYMBOLS], @@ -812,7 +812,7 @@ lzx_write_compressed_code(struct output_bitstream *out, const u8 prev_lens[restrict], unsigned num_syms) { - input_idx_t precode_freqs[LZX_PRECODE_NUM_SYMBOLS]; + u32 precode_freqs[LZX_PRECODE_NUM_SYMBOLS]; u8 output_syms[num_syms]; u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS]; u32 precode_codewords[LZX_PRECODE_NUM_SYMBOLS]; @@ -883,12 +883,12 @@ lzx_write_compressed_code(struct output_bitstream *out, static void lzx_write_matches_and_literals(struct output_bitstream *ostream, int block_type, - const struct lzx_match match_tab[], + const struct lzx_item match_tab[], unsigned match_count, const struct lzx_codes *codes) { for (unsigned i = 0; i < match_count; i++) { - struct lzx_match match = match_tab[i]; + struct lzx_item match = match_tab[i]; /* The high bit of the 32-bit intermediate representation * indicates whether the item is an actual LZ-style match (1) or @@ -943,8 +943,8 @@ lzx_write_compressed_block(int block_type, unsigned block_size, unsigned max_window_size, unsigned num_main_syms, - struct lzx_match * chosen_matches, - unsigned num_chosen_matches, + struct lzx_item * chosen_items, + unsigned num_chosen_items, const struct lzx_codes * codes, const struct lzx_codes * prev_codes, struct output_bitstream * ostream) @@ -1022,7 +1022,7 @@ lzx_write_compressed_block(int block_type, /* Write the actual matches and literals. */ lzx_write_matches_and_literals(ostream, block_type, - chosen_matches, num_chosen_matches, + chosen_items, num_chosen_items, codes); LZX_DEBUG("Done writing block."); @@ -1037,17 +1037,17 @@ lzx_write_all_blocks(struct lzx_compressor *ctx, struct output_bitstream *ostrea for (unsigned i = 0; i < ctx->num_blocks; i++) { const struct lzx_block_spec *spec = &ctx->block_specs[i]; - LZX_DEBUG("Writing block %u/%u (type=%d, size=%u, num_chosen_matches=%u)...", + LZX_DEBUG("Writing block %u/%u (type=%d, size=%u, num_chosen_items=%u)...", i + 1, ctx->num_blocks, spec->block_type, spec->block_size, - spec->num_chosen_matches); + spec->num_chosen_items); lzx_write_compressed_block(spec->block_type, spec->block_size, ctx->max_window_size, ctx->num_main_syms, - spec->chosen_matches, - spec->num_chosen_matches, + spec->chosen_items, + spec->num_chosen_items, &spec->codes, prev_codes, ostream); @@ -1058,7 +1058,7 @@ lzx_write_all_blocks(struct lzx_compressor *ctx, struct output_bitstream *ostrea /* Constructs an LZX match from a literal byte and updates the main code symbol * frequencies. */ -static u32 +static inline u32 lzx_tally_literal(u8 lit, struct lzx_freqs *freqs) { freqs->main[lit]++; @@ -1069,7 +1069,7 @@ lzx_tally_literal(u8 lit, struct lzx_freqs *freqs) * queue and the frequency of symbols in the main, length, and aligned offset * alphabets. The return value is a 32-bit number that provides the match in an * intermediate representation documented below. */ -static u32 +static inline u32 lzx_tally_match(unsigned match_len, u32 match_offset, struct lzx_freqs *freqs, struct lzx_lru_queue *queue) { @@ -1121,7 +1121,7 @@ lzx_tally_match(unsigned match_len, u32 match_offset, freqs->aligned[position_footer & 7]++; /* Pack the position slot, position footer, and match length into an - * intermediate representation. See `struct lzx_match' for details. + * intermediate representation. See `struct lzx_item' for details. */ LZX_ASSERT(LZX_MAX_POSITION_SLOTS <= 64); LZX_ASSERT(lzx_get_num_extra_bits(LZX_MAX_POSITION_SLOTS - 1) <= 17); @@ -1139,7 +1139,7 @@ lzx_tally_match(unsigned match_len, u32 match_offset, struct lzx_record_ctx { struct lzx_freqs freqs; struct lzx_lru_queue queue; - struct lzx_match *matches; + struct lzx_item *matches; }; static void @@ -1248,10 +1248,10 @@ lzx_set_costs(struct lzx_compressor * ctx, const struct lzx_lens * lens) * value is the number of matches found. */ static unsigned lzx_get_matches(struct lzx_compressor *ctx, - const struct raw_match **matches_ret) + const struct lz_match **matches_ret) { - struct raw_match *cache_ptr; - struct raw_match *matches; + struct lz_match *cache_ptr; + struct lz_match *matches; unsigned num_matches; LZX_ASSERT(ctx->match_window_pos < ctx->match_window_end); @@ -1324,7 +1324,7 @@ lzx_get_matches(struct lzx_compressor *ctx, static void lzx_skip_bytes(struct lzx_compressor *ctx, unsigned n) { - struct raw_match *cache_ptr; + struct lz_match *cache_ptr; LZX_ASSERT(n <= ctx->match_window_end - ctx->match_window_pos); @@ -1349,7 +1349,7 @@ lzx_skip_bytes(struct lzx_compressor *ctx, unsigned n) * * Returns the first match in the list. */ -static struct raw_match +static struct lz_match lzx_match_chooser_reverse_list(struct lzx_compressor *ctx, unsigned cur_pos) { unsigned prev_link, saved_prev_link; @@ -1375,7 +1375,7 @@ lzx_match_chooser_reverse_list(struct lzx_compressor *ctx, unsigned cur_pos) ctx->optimum_cur_idx = ctx->optimum[0].next.link; - return (struct raw_match) + return (struct lz_match) { .len = ctx->optimum_cur_idx, .offset = ctx->optimum[0].next.match_offset, }; @@ -1443,12 +1443,12 @@ lzx_match_chooser_reverse_list(struct lzx_compressor *ctx, unsigned cur_pos) * The return value is a (length, offset) pair specifying the match or literal * chosen. For literals, the length is 0 or 1 and the offset is meaningless. */ -static struct raw_match +static struct lz_match lzx_get_near_optimal_match(struct lzx_compressor *ctx) { unsigned num_matches; - const struct raw_match *matches; - struct raw_match match; + const struct lz_match *matches; + struct lz_match match; unsigned longest_len; unsigned longest_rep_len; u32 longest_rep_offset; @@ -1493,7 +1493,7 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx) /* If there's a long match with a recent offset, take it. */ if (longest_rep_len >= ctx->params.alg_params.slow.nice_match_length) { lzx_skip_bytes(ctx, longest_rep_len); - return (struct raw_match) { + return (struct lz_match) { .len = longest_rep_len, .offset = longest_rep_offset, }; @@ -1857,61 +1857,82 @@ lzx_optimize_block(struct lzx_compressor *ctx, struct lzx_block_spec *spec, const struct lzx_lru_queue orig_queue = ctx->queue; unsigned num_passes_remaining = num_passes; struct lzx_freqs freqs; + const u8 *window_ptr; + const u8 *window_end; + struct lzx_item *next_chosen_match; + struct lz_match lz_match; + struct lzx_item lzx_item; LZX_ASSERT(num_passes >= 1); LZX_ASSERT(lz_bt_get_position(&ctx->mf) == spec->window_pos); ctx->match_window_end = spec->window_pos + spec->block_size; - spec->chosen_matches = &ctx->chosen_matches[spec->window_pos]; ctx->matches_cached = false; /* The first optimal parsing pass is done using the cost model already * set in ctx->costs. Each later pass is done using a cost model - * computed from the previous pass. */ - do { - const u8 *window_ptr; - const u8 *window_end; - struct lzx_match *next_chosen_match; + * computed from the previous pass. + * + * To improve performance we only generate the array containing the + * matches and literals in intermediate form on the final pass. */ - --num_passes_remaining; + while (--num_passes_remaining) { ctx->match_window_pos = spec->window_pos; ctx->cache_ptr = ctx->cached_matches; memset(&freqs, 0, sizeof(freqs)); window_ptr = &ctx->window[spec->window_pos]; window_end = window_ptr + spec->block_size; - next_chosen_match = spec->chosen_matches; while (window_ptr != window_end) { - struct raw_match raw_match; - struct lzx_match lzx_match; - raw_match = lzx_get_near_optimal_match(ctx); + lz_match = lzx_get_near_optimal_match(ctx); - LZX_ASSERT(!(raw_match.len == LZX_MIN_MATCH_LEN && - raw_match.offset == ctx->max_window_size - + LZX_ASSERT(!(lz_match.len == LZX_MIN_MATCH_LEN && + lz_match.offset == ctx->max_window_size - LZX_MIN_MATCH_LEN)); - if (raw_match.len >= LZX_MIN_MATCH_LEN) { - lzx_match.data = lzx_tally_match(raw_match.len, - raw_match.offset, - &freqs, - &ctx->queue); - window_ptr += raw_match.len; + if (lz_match.len >= LZX_MIN_MATCH_LEN) { + lzx_tally_match(lz_match.len, lz_match.offset, + &freqs, &ctx->queue); + window_ptr += lz_match.len; } else { - lzx_match.data = lzx_tally_literal(*window_ptr, - &freqs); + lzx_tally_literal(*window_ptr, &freqs); window_ptr += 1; } - *next_chosen_match++ = lzx_match; } - spec->num_chosen_matches = next_chosen_match - spec->chosen_matches; lzx_make_huffman_codes(&freqs, &spec->codes, ctx->num_main_syms); - if (num_passes_remaining) { - lzx_set_costs(ctx, &spec->codes.lens); - ctx->queue = orig_queue; - ctx->matches_cached = true; - } - } while (num_passes_remaining); + lzx_set_costs(ctx, &spec->codes.lens); + ctx->queue = orig_queue; + ctx->matches_cached = true; + } + ctx->match_window_pos = spec->window_pos; + ctx->cache_ptr = ctx->cached_matches; + memset(&freqs, 0, sizeof(freqs)); + window_ptr = &ctx->window[spec->window_pos]; + window_end = window_ptr + spec->block_size; + + spec->chosen_items = &ctx->chosen_items[spec->window_pos]; + next_chosen_match = spec->chosen_items; + + while (window_ptr != window_end) { + lz_match = lzx_get_near_optimal_match(ctx); + + LZX_ASSERT(!(lz_match.len == LZX_MIN_MATCH_LEN && + lz_match.offset == ctx->max_window_size - + LZX_MIN_MATCH_LEN)); + if (lz_match.len >= LZX_MIN_MATCH_LEN) { + lzx_item.data = lzx_tally_match(lz_match.len, + lz_match.offset, + &freqs, &ctx->queue); + window_ptr += lz_match.len; + } else { + lzx_item.data = lzx_tally_literal(*window_ptr, &freqs); + window_ptr += 1; + } + *next_chosen_match++ = lzx_item; + } + spec->num_chosen_items = next_chosen_match - spec->chosen_items; + lzx_make_huffman_codes(&freqs, &spec->codes, ctx->num_main_syms); spec->block_type = lzx_choose_verbatim_or_aligned(&freqs, &spec->codes); } @@ -1962,7 +1983,7 @@ lzx_prepare_blocks(struct lzx_compressor * ctx) * * ctx->block_specs[] * ctx->num_blocks - * ctx->chosen_matches[] + * ctx->chosen_items[] */ static void lzx_prepare_block_fast(struct lzx_compressor * ctx) @@ -1988,7 +2009,7 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx) /* Initialize symbol frequencies and match offset LRU queue. */ memset(&record_ctx.freqs, 0, sizeof(struct lzx_freqs)); lzx_lru_queue_init(&record_ctx.queue); - record_ctx.matches = ctx->chosen_matches; + record_ctx.matches = ctx->chosen_items; /* Determine series of matches/literals to output. */ lz_analyze_block(ctx->window, @@ -2004,8 +2025,8 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx) spec->block_type = LZX_BLOCKTYPE_ALIGNED; spec->window_pos = 0; spec->block_size = ctx->window_size; - spec->num_chosen_matches = (record_ctx.matches - ctx->chosen_matches); - spec->chosen_matches = ctx->chosen_matches; + spec->num_chosen_items = (record_ctx.matches - ctx->chosen_items); + spec->chosen_items = ctx->chosen_items; lzx_make_huffman_codes(&record_ctx.freqs, &spec->codes, ctx->num_main_syms); ctx->num_blocks = 1; @@ -2065,7 +2086,7 @@ lzx_compress(const void *uncompressed_data, size_t uncompressed_size, LZX_DEBUG("Flushing bitstream..."); compressed_size = flush_output_bitstream(&ostream); - if (compressed_size == ~(input_idx_t)0) { + if (compressed_size == (u32)~0UL) { LZX_DEBUG("Data did not compress to %zu bytes or less!", compressed_size_avail); return 0; @@ -2125,7 +2146,7 @@ lzx_free_compressor(void *_ctx) struct lzx_compressor *ctx = _ctx; if (ctx) { - FREE(ctx->chosen_matches); + FREE(ctx->chosen_items); FREE(ctx->cached_matches); FREE(ctx->optimum); lz_bt_destroy(&ctx->mf); @@ -2199,8 +2220,6 @@ lzx_create_compressor(size_t window_size, if (!lzx_window_size_valid(window_size)) return WIMLIB_ERR_INVALID_PARAM; - LZX_DEBUG("Allocating memory."); - ctx = CALLOC(1, sizeof(struct lzx_compressor)); if (ctx == NULL) goto oom; @@ -2241,7 +2260,7 @@ lzx_create_compressor(size_t window_size, min(params->alg_params.slow.nice_match_length, LZX_MAX_MATCH_LEN)) * sizeof(ctx->optimum[0])); - if (!ctx->optimum) + if (ctx->optimum == NULL) goto oom; } @@ -2253,8 +2272,8 @@ lzx_create_compressor(size_t window_size, LZX_CACHE_LEN - (LZX_MAX_MATCHES_PER_POS + 1); } - ctx->chosen_matches = MALLOC(window_size * sizeof(ctx->chosen_matches[0])); - if (ctx->chosen_matches == NULL) + ctx->chosen_items = MALLOC(window_size * sizeof(ctx->chosen_items[0])); + if (ctx->chosen_items == NULL) goto oom; memcpy(&ctx->params, params, sizeof(struct wimlib_lzx_compressor_params)); @@ -2286,7 +2305,7 @@ lzx_get_needed_memory(size_t max_block_size, sizeof(((struct lzx_compressor*)0)->block_specs[0]); if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) { - size += max_block_size * sizeof(((struct lzx_compressor*)0)->chosen_matches[0]); + size += max_block_size * sizeof(((struct lzx_compressor*)0)->chosen_items[0]); size += lz_bt_get_needed_memory(max_block_size); size += (LZX_OPTIM_ARRAY_SIZE + min(params->alg_params.slow.nice_match_length,