X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flzms-compress.c;h=7c103f7b42150695b366ef69170b161a9160b432;hp=0e9f0c3657d996bdaf9fe2dc669abc699e1e7961;hb=dd38de3e27916c2d7fe97158c3df38c6b9b43e0d;hpb=b7071062542143113ad654d89ee6b0603b23b524 diff --git a/src/lzms-compress.c b/src/lzms-compress.c index 0e9f0c36..7c103f7b 100644 --- a/src/lzms-compress.c +++ b/src/lzms-compress.c @@ -116,7 +116,7 @@ struct lzms_range_encoder { * lzms_range_encoder_raw. */ struct lzms_range_encoder_raw *rc; - /* Bits recently encoded by this range encoder. This are used as in + /* Bits recently encoded by this range encoder. This is used as an * index into @prob_entries. */ u32 state; @@ -669,17 +669,9 @@ lzms_do_init_rc_costs(void) static void lzms_init_rc_costs(void) { - static bool done = false; - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - - if (unlikely(!done)) { - pthread_mutex_lock(&mutex); - if (!done) { - lzms_do_init_rc_costs(); - done = true; - } - pthread_mutex_unlock(&mutex); - } + static pthread_once_t once = PTHREAD_ONCE_INIT; + + pthread_once(&once, lzms_do_init_rc_costs); } /* @@ -734,7 +726,7 @@ lzms_offset_cost(const struct lzms_huffman_encoder *enc, u32 offset) } static u32 -lzms_length_cost(const struct lzms_huffman_encoder *enc, u32 length) +lzms_get_length_cost(const struct lzms_huffman_encoder *enc, u32 length) { u32 slot; u32 num_extra_bits; @@ -782,9 +774,8 @@ lzms_get_literal_cost(struct lzms_compressor *ctx, } static u32 -lzms_get_lz_match_cost(struct lzms_compressor *ctx, - struct lzms_adaptive_state *state, - u32 length, u32 offset) +lzms_get_lz_match_cost_nolen(struct lzms_compressor *ctx, + struct lzms_adaptive_state *state, u32 offset) { u32 cost = 0; int recent_offset_idx; @@ -827,7 +818,6 @@ lzms_get_lz_match_cost(struct lzms_compressor *ctx, state->lru.recent_offsets[i] = state->lru.recent_offsets[i + 1]; } - cost += lzms_length_cost(&ctx->length_encoder, length); state->lru.upcoming_offset = offset; lzms_update_lz_lru_queues(&state->lru); @@ -835,6 +825,15 @@ lzms_get_lz_match_cost(struct lzms_compressor *ctx, return cost; } +static u32 +lzms_get_lz_match_cost(struct lzms_compressor *ctx, + struct lzms_adaptive_state *state, + u32 length, u32 offset) +{ + return lzms_get_lz_match_cost_nolen(ctx, state, offset) + + lzms_get_length_cost(&ctx->length_encoder, length); +} + static struct raw_match lzms_match_chooser_reverse_list(struct lzms_compressor *ctx, unsigned cur_pos) { @@ -878,7 +877,6 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx) u32 longest_len; u32 longest_rep_len; u32 longest_rep_offset; - struct raw_match *matchptr; unsigned cur_pos; unsigned end_pos; struct lzms_adaptive_state initial_state; @@ -896,7 +894,7 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx) ctx->optimum_end_idx = 0; longest_rep_len = ctx->params.min_match_length - 1; - if (lz_bt_get_position(&ctx->mf) >= 1) { + if (lz_bt_get_position(&ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) { u32 limit = min(ctx->params.max_match_length, lz_bt_get_remaining_size(&ctx->mf)); for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) { @@ -946,18 +944,26 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx) *(lz_bt_get_window_ptr(&ctx->mf) - 1)); ctx->optimum[1].prev.link = 0; - matchptr = matches; - for (u32 len = 2; len <= longest_len; len++) { - u32 offset = matchptr->offset; - - ctx->optimum[len].state = initial_state; - ctx->optimum[len].prev.link = 0; - ctx->optimum[len].prev.match_offset = offset; - ctx->optimum[len].cost = lzms_get_lz_match_cost(ctx, - &ctx->optimum[len].state, - len, offset); - if (len == matchptr->len) - matchptr++; + for (u32 i = 0, len = 2; i < num_matches; i++) { + u32 offset = matches[i].offset; + struct lzms_adaptive_state state; + u32 position_cost; + + state = initial_state; + position_cost = 0; + position_cost += lzms_get_lz_match_cost_nolen(ctx, &state, offset); + + do { + u32 cost; + + cost = position_cost; + cost += lzms_get_length_cost(&ctx->length_encoder, len); + + ctx->optimum[len].state = state; + ctx->optimum[len].prev.link = 0; + ctx->optimum[len].prev.match_offset = offset; + ctx->optimum[len].cost = cost; + } while (++len <= matches[i].len); } end_pos = longest_len; @@ -992,18 +998,20 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx) return lzms_match_chooser_reverse_list(ctx, cur_pos); longest_rep_len = ctx->params.min_match_length - 1; - u32 limit = min(ctx->params.max_match_length, - lz_bt_get_remaining_size(&ctx->mf)); - for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) { - u32 offset = ctx->optimum[cur_pos].state.lru.recent_offsets[i]; - const u8 *strptr = lz_bt_get_window_ptr(&ctx->mf); - const u8 *matchptr = strptr - offset; - u32 len = 0; - while (len < limit && strptr[len] == matchptr[len]) - len++; - if (len > longest_rep_len) { - longest_rep_len = len; - longest_rep_offset = offset; + if (lz_bt_get_position(&ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) { + u32 limit = min(ctx->params.max_match_length, + lz_bt_get_remaining_size(&ctx->mf)); + for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) { + u32 offset = ctx->optimum[cur_pos].state.lru.recent_offsets[i]; + const u8 *strptr = lz_bt_get_window_ptr(&ctx->mf); + const u8 *matchptr = strptr - offset; + u32 len = 0; + while (len < limit && strptr[len] == matchptr[len]) + len++; + if (len > longest_rep_len) { + longest_rep_len = len; + longest_rep_offset = offset; + } } } @@ -1053,23 +1061,28 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx) ctx->optimum[cur_pos + 1].prev.link = cur_pos; } - matchptr = matches; - for (u32 len = 2; len <= longest_len; len++) { - u32 offset; + for (u32 i = 0, len = 2; i < num_matches; i++) { + u32 offset = matches[i].offset; + struct lzms_adaptive_state state; + u32 position_cost; - offset = matchptr->offset; state = ctx->optimum[cur_pos].state; + position_cost = ctx->optimum[cur_pos].cost; + position_cost += lzms_get_lz_match_cost_nolen(ctx, &state, offset); - cost = ctx->optimum[cur_pos].cost + - lzms_get_lz_match_cost(ctx, &state, len, offset); - if (cost < ctx->optimum[cur_pos + len].cost) { - ctx->optimum[cur_pos + len].state = state; - ctx->optimum[cur_pos + len].prev.link = cur_pos; - ctx->optimum[cur_pos + len].prev.match_offset = offset; - ctx->optimum[cur_pos + len].cost = cost; - } - if (len == matchptr->len) - matchptr++; + do { + u32 cost; + + cost = position_cost; + cost += lzms_get_length_cost(&ctx->length_encoder, len); + + if (cost < ctx->optimum[cur_pos + len].cost) { + ctx->optimum[cur_pos + len].state = state; + ctx->optimum[cur_pos + len].prev.link = cur_pos; + ctx->optimum[cur_pos + len].prev.match_offset = offset; + ctx->optimum[cur_pos + len].cost = cost; + } + } while (++len <= matches[i].len); } if (longest_rep_len >= ctx->params.min_match_length) {