X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzx-compress.c;h=a51cfb1b15f7d82c3d5bac490beb204ccb3cb13f;hp=190a729303fc041e18b43a51a1ac5e60ed52a37c;hb=41c221c509deed7dc9c2bd8eb8c7e93563b21199;hpb=41772ede0ef29ca61d37539b41dc0c172e54c2ab diff --git a/src/lzx-compress.c b/src/lzx-compress.c index 190a7293..a51cfb1b 100644 --- a/src/lzx-compress.c +++ b/src/lzx-compress.c @@ -254,7 +254,7 @@ #define LZX_CACHE_PER_POS 8 #define LZX_CACHE_LEN (LZX_DIV_BLOCK_SIZE * (LZX_CACHE_PER_POS + 1)) -#define LZX_CACHE_SIZE (LZX_CACHE_LEN * sizeof(struct raw_match)) +#define LZX_CACHE_SIZE (LZX_CACHE_LEN * sizeof(struct lz_match)) #define LZX_MAX_MATCHES_PER_POS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1) /* Codewords for the LZX main, length, and aligned offset Huffman codes */ @@ -279,7 +279,7 @@ struct lzx_lens { * * If a codeword has zero frequency, it must still be assigned some nonzero cost * --- generally a high cost, since even if it gets used in the next iteration, - * it probably will not be used very times. */ + * it probably will not be used very many times. */ struct lzx_costs { u8 main[LZX_MAINCODE_MAX_NUM_SYMBOLS]; u8 len[LZX_LENCODE_NUM_SYMBOLS]; @@ -294,9 +294,9 @@ struct lzx_codes { /* Tables for tallying symbol frequencies in the three LZX alphabets */ struct lzx_freqs { - input_idx_t main[LZX_MAINCODE_MAX_NUM_SYMBOLS]; - input_idx_t len[LZX_LENCODE_NUM_SYMBOLS]; - input_idx_t aligned[LZX_ALIGNEDCODE_NUM_SYMBOLS]; + u32 main[LZX_MAINCODE_MAX_NUM_SYMBOLS]; + u32 len[LZX_LENCODE_NUM_SYMBOLS]; + u32 aligned[LZX_ALIGNEDCODE_NUM_SYMBOLS]; }; /* LZX intermediate match/literal format */ @@ -325,16 +325,16 @@ struct lzx_block_spec { int block_type; /* 0-based position in the window at which this block starts. */ - input_idx_t window_pos; + u32 window_pos; /* The number of bytes of uncompressed data this block represents. */ - input_idx_t block_size; + u32 block_size; /* The match/literal sequence for this block. */ struct lzx_item *chosen_items; /* The length of the @chosen_items sequence. */ - input_idx_t num_chosen_items; + u32 num_chosen_items; /* Huffman codes for this block. */ struct lzx_codes codes; @@ -363,10 +363,10 @@ struct lzx_compressor { /* Number of bytes of data to be compressed, which is the number of * bytes of data in @window that are actually valid. */ - input_idx_t window_size; + u32 window_size; /* Allocated size of the @window. */ - input_idx_t max_window_size; + u32 max_window_size; /* Number of symbols in the main alphabet (depends on the * @max_window_size since it determines the maximum allowed offset). */ @@ -397,27 +397,27 @@ struct lzx_compressor { struct lzx_costs costs; /* Fast algorithm only: Array of hash table links. */ - input_idx_t *prev_tab; + u32 *prev_tab; /* Slow algorithm only: Binary tree match-finder. */ struct lz_bt mf; /* Position in window of next match to return. */ - input_idx_t match_window_pos; + u32 match_window_pos; /* The end-of-block position. We can't allow any matches to span this * position. */ - input_idx_t match_window_end; + u32 match_window_end; /* Matches found by the match-finder are cached in the following array * to achieve a slight speedup when the same matches are needed on * subsequent passes. This is suboptimal because different matches may * be preferred with different cost models, but seems to be a worthwhile * speedup. */ - struct raw_match *cached_matches; - struct raw_match *cache_ptr; + struct lz_match *cached_matches; + struct lz_match *cache_ptr; bool matches_cached; - struct raw_match *cache_limit; + struct lz_match *cache_limit; /* Match-chooser state. * When matches have been chosen, optimum_cur_idx is set to the position @@ -453,22 +453,22 @@ struct lzx_mc_pos_data { /* Position of the start of the match or literal that * was taken to get to this position in the approximate * minimum-cost parse. */ - input_idx_t link; + u32 link; /* Offset (as in an LZ (length, offset) pair) of the * match or literal that was taken to get to this * position in the approximate minimum-cost parse. */ - input_idx_t match_offset; + u32 match_offset; } prev; struct { /* Position at which the match or literal starting at * this position ends in the minimum-cost parse. */ - input_idx_t link; + u32 link; /* Offset (as in an LZ (length, offset) pair) of the * match or literal starting at this position in the * approximate minimum-cost parse. */ - input_idx_t match_offset; + u32 match_offset; } next; }; @@ -550,7 +550,7 @@ lzx_make_huffman_codes(const struct lzx_freqs *freqs, * The type of the LZX block (LZX_BLOCKTYPE_ALIGNED or * LZX_BLOCKTYPE_VERBATIM) * @match: - * The match, as a (length, offset) pair. + * The match data. * @codes: * Pointer to a structure that contains the codewords for the main, length, * and aligned offset Huffman codes for the current LZX compressed block. @@ -643,7 +643,7 @@ static unsigned lzx_build_precode(const u8 lens[restrict], const u8 prev_lens[restrict], const unsigned num_syms, - input_idx_t precode_freqs[restrict LZX_PRECODE_NUM_SYMBOLS], + u32 precode_freqs[restrict LZX_PRECODE_NUM_SYMBOLS], u8 output_syms[restrict num_syms], u8 precode_lens[restrict LZX_PRECODE_NUM_SYMBOLS], u32 precode_codewords[restrict LZX_PRECODE_NUM_SYMBOLS], @@ -812,7 +812,7 @@ lzx_write_compressed_code(struct output_bitstream *out, const u8 prev_lens[restrict], unsigned num_syms) { - input_idx_t precode_freqs[LZX_PRECODE_NUM_SYMBOLS]; + u32 precode_freqs[LZX_PRECODE_NUM_SYMBOLS]; u8 output_syms[num_syms]; u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS]; u32 precode_codewords[LZX_PRECODE_NUM_SYMBOLS]; @@ -872,31 +872,27 @@ lzx_write_compressed_code(struct output_bitstream *out, * @block_type * The chosen type of the LZX compressed block (LZX_BLOCKTYPE_ALIGNED or * LZX_BLOCKTYPE_VERBATIM). - * @match_tab + * @items * The array of matches/literals to output. - * @match_count - * Number of matches/literals to output (length of @match_tab). + * @num_items + * Number of matches/literals to output (length of @items). * @codes * The main, length, and aligned offset Huffman codes for the current * LZX compressed block. */ static void -lzx_write_matches_and_literals(struct output_bitstream *ostream, - int block_type, - const struct lzx_item match_tab[], - unsigned match_count, - const struct lzx_codes *codes) +lzx_write_items(struct output_bitstream *ostream, int block_type, + const struct lzx_item items[], u32 num_items, + const struct lzx_codes *codes) { - for (unsigned i = 0; i < match_count; i++) { - struct lzx_item match = match_tab[i]; - + for (u32 i = 0; i < num_items; i++) { /* The high bit of the 32-bit intermediate representation * indicates whether the item is an actual LZ-style match (1) or * a literal byte (0). */ - if (match.data & 0x80000000) - lzx_write_match(ostream, block_type, match, codes); + if (items[i].data & 0x80000000) + lzx_write_match(ostream, block_type, items[i], codes); else - lzx_write_literal(ostream, match.data, codes); + lzx_write_literal(ostream, items[i].data, codes); } } @@ -1021,9 +1017,8 @@ lzx_write_compressed_block(int block_type, LZX_DEBUG("Writing matches and literals..."); /* Write the actual matches and literals. */ - lzx_write_matches_and_literals(ostream, block_type, - chosen_items, num_chosen_items, - codes); + lzx_write_items(ostream, block_type, + chosen_items, num_chosen_items, codes); LZX_DEBUG("Done writing block."); } @@ -1139,7 +1134,7 @@ lzx_tally_match(unsigned match_len, u32 match_offset, struct lzx_record_ctx { struct lzx_freqs freqs; struct lzx_lru_queue queue; - struct lzx_item *matches; + struct lzx_item *items; }; static void @@ -1147,7 +1142,7 @@ lzx_record_match(unsigned len, unsigned offset, void *_ctx) { struct lzx_record_ctx *ctx = _ctx; - (ctx->matches++)->data = lzx_tally_match(len, offset, &ctx->freqs, &ctx->queue); + (ctx->items++)->data = lzx_tally_match(len, offset, &ctx->freqs, &ctx->queue); } static void @@ -1155,7 +1150,7 @@ lzx_record_literal(u8 lit, void *_ctx) { struct lzx_record_ctx *ctx = _ctx; - (ctx->matches++)->data = lzx_tally_literal(lit, &ctx->freqs); + (ctx->items++)->data = lzx_tally_literal(lit, &ctx->freqs); } /* Returns the cost, in bits, to output a literal byte using the specified cost @@ -1248,10 +1243,10 @@ lzx_set_costs(struct lzx_compressor * ctx, const struct lzx_lens * lens) * value is the number of matches found. */ static unsigned lzx_get_matches(struct lzx_compressor *ctx, - const struct raw_match **matches_ret) + const struct lz_match **matches_ret) { - struct raw_match *cache_ptr; - struct raw_match *matches; + struct lz_match *cache_ptr; + struct lz_match *matches; unsigned num_matches; LZX_ASSERT(ctx->match_window_pos < ctx->match_window_end); @@ -1324,7 +1319,7 @@ lzx_get_matches(struct lzx_compressor *ctx, static void lzx_skip_bytes(struct lzx_compressor *ctx, unsigned n) { - struct raw_match *cache_ptr; + struct lz_match *cache_ptr; LZX_ASSERT(n <= ctx->match_window_end - ctx->match_window_pos); @@ -1349,7 +1344,7 @@ lzx_skip_bytes(struct lzx_compressor *ctx, unsigned n) * * Returns the first match in the list. */ -static struct raw_match +static struct lz_match lzx_match_chooser_reverse_list(struct lzx_compressor *ctx, unsigned cur_pos) { unsigned prev_link, saved_prev_link; @@ -1375,7 +1370,7 @@ lzx_match_chooser_reverse_list(struct lzx_compressor *ctx, unsigned cur_pos) ctx->optimum_cur_idx = ctx->optimum[0].next.link; - return (struct raw_match) + return (struct lz_match) { .len = ctx->optimum_cur_idx, .offset = ctx->optimum[0].next.match_offset, }; @@ -1443,12 +1438,12 @@ lzx_match_chooser_reverse_list(struct lzx_compressor *ctx, unsigned cur_pos) * The return value is a (length, offset) pair specifying the match or literal * chosen. For literals, the length is 0 or 1 and the offset is meaningless. */ -static struct raw_match +static struct lz_match lzx_get_near_optimal_match(struct lzx_compressor *ctx) { unsigned num_matches; - const struct raw_match *matches; - struct raw_match match; + const struct lz_match *matches; + struct lz_match match; unsigned longest_len; unsigned longest_rep_len; u32 longest_rep_offset; @@ -1493,7 +1488,7 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx) /* If there's a long match with a recent offset, take it. */ if (longest_rep_len >= ctx->params.alg_params.slow.nice_match_length) { lzx_skip_bytes(ctx, longest_rep_len); - return (struct raw_match) { + return (struct lz_match) { .len = longest_rep_len, .offset = longest_rep_offset, }; @@ -1860,7 +1855,7 @@ lzx_optimize_block(struct lzx_compressor *ctx, struct lzx_block_spec *spec, const u8 *window_ptr; const u8 *window_end; struct lzx_item *next_chosen_match; - struct raw_match raw_match; + struct lz_match lz_match; struct lzx_item lzx_item; LZX_ASSERT(num_passes >= 1); @@ -1885,15 +1880,15 @@ lzx_optimize_block(struct lzx_compressor *ctx, struct lzx_block_spec *spec, while (window_ptr != window_end) { - raw_match = lzx_get_near_optimal_match(ctx); + lz_match = lzx_get_near_optimal_match(ctx); - LZX_ASSERT(!(raw_match.len == LZX_MIN_MATCH_LEN && - raw_match.offset == ctx->max_window_size - + LZX_ASSERT(!(lz_match.len == LZX_MIN_MATCH_LEN && + lz_match.offset == ctx->max_window_size - LZX_MIN_MATCH_LEN)); - if (raw_match.len >= LZX_MIN_MATCH_LEN) { - lzx_tally_match(raw_match.len, raw_match.offset, + if (lz_match.len >= LZX_MIN_MATCH_LEN) { + lzx_tally_match(lz_match.len, lz_match.offset, &freqs, &ctx->queue); - window_ptr += raw_match.len; + window_ptr += lz_match.len; } else { lzx_tally_literal(*window_ptr, &freqs); window_ptr += 1; @@ -1915,16 +1910,16 @@ lzx_optimize_block(struct lzx_compressor *ctx, struct lzx_block_spec *spec, next_chosen_match = spec->chosen_items; while (window_ptr != window_end) { - raw_match = lzx_get_near_optimal_match(ctx); + lz_match = lzx_get_near_optimal_match(ctx); - LZX_ASSERT(!(raw_match.len == LZX_MIN_MATCH_LEN && - raw_match.offset == ctx->max_window_size - + LZX_ASSERT(!(lz_match.len == LZX_MIN_MATCH_LEN && + lz_match.offset == ctx->max_window_size - LZX_MIN_MATCH_LEN)); - if (raw_match.len >= LZX_MIN_MATCH_LEN) { - lzx_item.data = lzx_tally_match(raw_match.len, - raw_match.offset, + if (lz_match.len >= LZX_MIN_MATCH_LEN) { + lzx_item.data = lzx_tally_match(lz_match.len, + lz_match.offset, &freqs, &ctx->queue); - window_ptr += raw_match.len; + window_ptr += lz_match.len; } else { lzx_item.data = lzx_tally_literal(*window_ptr, &freqs); window_ptr += 1; @@ -2009,7 +2004,7 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx) /* Initialize symbol frequencies and match offset LRU queue. */ memset(&record_ctx.freqs, 0, sizeof(struct lzx_freqs)); lzx_lru_queue_init(&record_ctx.queue); - record_ctx.matches = ctx->chosen_items; + record_ctx.items = ctx->chosen_items; /* Determine series of matches/literals to output. */ lz_analyze_block(ctx->window, @@ -2025,7 +2020,7 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx) spec->block_type = LZX_BLOCKTYPE_ALIGNED; spec->window_pos = 0; spec->block_size = ctx->window_size; - spec->num_chosen_items = (record_ctx.matches - ctx->chosen_items); + spec->num_chosen_items = record_ctx.items - ctx->chosen_items; spec->chosen_items = ctx->chosen_items; lzx_make_huffman_codes(&record_ctx.freqs, &spec->codes, ctx->num_main_syms); @@ -2086,7 +2081,7 @@ lzx_compress(const void *uncompressed_data, size_t uncompressed_size, LZX_DEBUG("Flushing bitstream..."); compressed_size = flush_output_bitstream(&ostream); - if (compressed_size == ~(input_idx_t)0) { + if (compressed_size == (u32)~0UL) { LZX_DEBUG("Data did not compress to %zu bytes or less!", compressed_size_avail); return 0; @@ -2220,8 +2215,6 @@ lzx_create_compressor(size_t window_size, if (!lzx_window_size_valid(window_size)) return WIMLIB_ERR_INVALID_PARAM; - LZX_DEBUG("Allocating memory."); - ctx = CALLOC(1, sizeof(struct lzx_compressor)); if (ctx == NULL) goto oom; @@ -2262,7 +2255,7 @@ lzx_create_compressor(size_t window_size, min(params->alg_params.slow.nice_match_length, LZX_MAX_MATCH_LEN)) * sizeof(ctx->optimum[0])); - if (!ctx->optimum) + if (ctx->optimum == NULL) goto oom; }