X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzms-compress.c;h=2c9356d9b1a538006a7fc9dcae90404bc2c271a2;hp=a18da5781842b12f892b51ef94eebbe9e7f433f1;hb=ee4fcdd5c4924803ae67a09fecac7d6b4b8ead6e;hpb=1dbfb8e36f81ced8474ce048c2d0e18ac07de60d diff --git a/src/lzms-compress.c b/src/lzms-compress.c index a18da578..2c9356d9 100644 --- a/src/lzms-compress.c +++ b/src/lzms-compress.c @@ -24,8 +24,7 @@ /* This a compressor for the LZMS compression format. More details about this * format can be found in lzms-decompress.c. * - * This is currently an unsophisticated implementation that is fast but does not - * attain the best compression ratios allowed by the format. + * NOTE: this compressor currently does not code any delta matches. */ #ifdef HAVE_CONFIG_H @@ -33,12 +32,12 @@ #endif #include "wimlib.h" +#include "wimlib/assert.h" #include "wimlib/compiler.h" #include "wimlib/compressor_ops.h" #include "wimlib/compress_common.h" #include "wimlib/endianness.h" #include "wimlib/error.h" -#include "wimlib/lz_hash.h" #include "wimlib/lz_sarray.h" #include "wimlib/lzms.h" #include "wimlib/util.h" @@ -47,8 +46,6 @@ #include #include -#define LZMS_OPTIM_ARRAY_SIZE 1024 - struct lzms_compressor; struct lzms_adaptive_state { struct lzms_lz_lru_queues lru; @@ -127,7 +124,7 @@ struct lzms_range_encoder { * lzms_range_encoder_raw. */ struct lzms_range_encoder_raw *rc; - /* Bits recently encoded by this range encoder. This are used as in + /* Bits recently encoded by this range encoder. This is used as an * index into @prob_entries. */ u32 state; @@ -166,7 +163,7 @@ struct lzms_huffman_encoder { u8 lens[LZMS_MAX_NUM_SYMS]; /* The codeword of each symbol in the Huffman code. */ - u16 codewords[LZMS_MAX_NUM_SYMS]; + u32 codewords[LZMS_MAX_NUM_SYMS]; }; /* State of the LZMS compressor. */ @@ -180,12 +177,6 @@ struct lzms_compressor { /* Size of the data in @buffer. */ u32 window_size; -#if 0 - /* Temporary array used by lz_analyze_block(); must be at least as long - * as the window. */ - u32 *prev_tab; -#endif - /* Suffix array match-finder. */ struct lz_sarray lz_sarray; @@ -543,7 +534,7 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) /* Main bit: 1 = a match, not a literal. */ lzms_range_encode_bit(&ctx->main_range_encoder, 1); - /* Match bit: 0 = a LZ match, not a delta match. */ + /* Match bit: 0 = an LZ match, not a delta match. */ lzms_range_encode_bit(&ctx->match_range_encoder, 0); /* Determine if the offset can be represented as a recent offset. */ @@ -594,48 +585,6 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) lzms_end_encode_item(ctx, length); } -#if 0 -static void -lzms_record_literal(u8 literal, void *_ctx) -{ - struct lzms_compressor *ctx = _ctx; - - lzms_encode_literal(ctx, literal); -} - -static void -lzms_record_match(unsigned length, unsigned offset, void *_ctx) -{ - struct lzms_compressor *ctx = _ctx; - - lzms_encode_lz_match(ctx, length, offset); -} - -static void -lzms_fast_encode(struct lzms_compressor *ctx) -{ - static const struct lz_params lzms_lz_params = { - .min_match = 3, - .max_match = UINT_MAX, - .max_offset = UINT_MAX, - .nice_match = 64, - .good_match = 32, - .max_chain_len = 64, - .max_lazy_match = 258, - .too_far = 4096, - }; - - lz_analyze_block(ctx->window, - ctx->window_size, - lzms_record_match, - lzms_record_literal, - ctx, - &lzms_lz_params, - ctx->prev_tab); - -} -#endif - /* Fast heuristic cost evaluation to use in the inner loop of the match-finder. * Unlike lzms_get_lz_match_cost(), which does a true cost evaluation, this * simply prioritize matches based on their offset. */ @@ -717,17 +666,9 @@ lzms_do_init_rc_costs(void) static void lzms_init_rc_costs(void) { - static bool done = false; - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - - if (unlikely(!done)) { - pthread_mutex_lock(&mutex); - if (!done) { - lzms_do_init_rc_costs(); - done = true; - } - pthread_mutex_unlock(&mutex); - } + static pthread_once_t once = PTHREAD_ONCE_INIT; + + pthread_once(&once, lzms_do_init_rc_costs); } /* @@ -927,12 +868,13 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx) * - The costs of literals and matches are estimated using the range encoder * states and the semi-adaptive Huffman codes. Except for range encoding * states, costs are assumed to be constant throughout a single run of the - * parsing algorithm, which can parse up to LZMS_OPTIM_ARRAY_SIZE bytes of - * data. This introduces a source of inaccuracy because the probabilities and - * Huffman codes can change over this part of the data. + * parsing algorithm, which can parse up to @optim_array_length (from the + * `struct wimlib_lzms_compressor_params') bytes of data. This introduces a + * source of inaccuracy because the probabilities and Huffman codes can change + * over this part of the data. */ static void -lzms_normal_encode(struct lzms_compressor *ctx) +lzms_encode(struct lzms_compressor *ctx) { struct raw_match match; @@ -1146,11 +1088,7 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size, /* Compute and encode a literal/match sequence that decompresses to the * preprocessed data. */ -#if 1 - lzms_normal_encode(ctx); -#else - lzms_fast_encode(ctx); -#endif + lzms_encode(ctx); /* Get and return the compressed data size. */ compressed_size = lzms_finalize(ctx, compressed_data, @@ -1215,9 +1153,6 @@ lzms_free_compressor(void *_ctx) if (ctx) { FREE(ctx->window); -#if 0 - FREE(ctx->prev_tab); -#endif FREE(ctx->matches); lz_sarray_destroy(&ctx->lz_sarray); lz_match_chooser_destroy(&ctx->mc); @@ -1226,7 +1161,9 @@ lzms_free_compressor(void *_ctx) } static const struct wimlib_lzms_compressor_params lzms_default = { - .hdr = sizeof(struct wimlib_lzms_compressor_params), + .hdr = { + .size = sizeof(struct wimlib_lzms_compressor_params), + }, .min_match_length = 2, .max_match_length = UINT32_MAX, .nice_match_length = 32, @@ -1235,6 +1172,9 @@ static const struct wimlib_lzms_compressor_params lzms_default = { .optim_array_length = 1024, }; +static bool +lzms_params_valid(const struct wimlib_compressor_params_header *); + static const struct wimlib_lzms_compressor_params * lzms_get_params(const struct wimlib_compressor_params_header *_params) { @@ -1244,6 +1184,8 @@ lzms_get_params(const struct wimlib_compressor_params_header *_params) if (params == NULL) params = &lzms_default; + LZMS_ASSERT(lzms_params_valid(¶ms->hdr)); + return params; } @@ -1268,12 +1210,6 @@ lzms_create_compressor(size_t max_block_size, if (ctx->window == NULL) goto oom; -#if 0 - ctx->prev_tab = MALLOC(max_block_size * sizeof(ctx->prev_tab[0])); - if (ctx->prev_tab == NULL) - goto oom; -#endif - ctx->matches = MALLOC(min(params->max_match_length - params->min_match_length + 1, params->max_matches_per_pos) * @@ -1283,7 +1219,7 @@ lzms_create_compressor(size_t max_block_size, if (!lz_sarray_init(&ctx->lz_sarray, max_block_size, params->min_match_length, - params->max_match_length, + min(params->max_match_length, LZ_SARRAY_LEN_MAX), params->max_search_depth, params->max_matches_per_pos)) goto oom;