From abe65b1f2f12bcedec1103cc7924897720f919be Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 1 Jan 2014 22:54:03 -0600 Subject: [PATCH] LZMS: Accelerate slot-finding --- include/wimlib/lzms.h | 46 ++++++++++--- src/lzms-common.c | 130 ++++++++++++++++++++++++----------- src/lzms-compress.c | 154 ++++++++++++++++++++---------------------- src/lzms-decompress.c | 40 +++++++---- 4 files changed, 227 insertions(+), 143 deletions(-) diff --git a/include/wimlib/lzms.h b/include/wimlib/lzms.h index ff3888eb..798b339c 100644 --- a/include/wimlib/lzms.h +++ b/include/wimlib/lzms.h @@ -8,6 +8,8 @@ #ifdef ENABLE_LZMS_DEBUG # define LZMS_DEBUG DEBUG # define LZMS_ASSERT wimlib_assert +# include "wimlib/assert.h" +# include "wimlib/error.h" #else # define LZMS_DEBUG(format, ...) # define LZMS_ASSERT(...) @@ -47,7 +49,7 @@ /* Code shared between the LZMS decompressor and compressor. */ -#include +#include extern void lzms_x86_filter(u8 data[], s32 size, s32 last_target_usages[], bool undo); @@ -103,26 +105,52 @@ struct lzms_lru_queues { extern u32 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1]; +extern u8 lzms_extra_position_bits[LZMS_MAX_NUM_OFFSET_SYMS]; + +extern u16 lzms_order_to_position_slot_bounds[30][2]; + extern u32 lzms_length_slot_base[LZMS_NUM_LEN_SYMS + 1]; +#define LZMS_NUM_FAST_LENGTHS 1024 +extern u8 lzms_length_slot_fast[LZMS_NUM_FAST_LENGTHS]; + +extern u8 lzms_extra_length_bits[LZMS_NUM_LEN_SYMS]; + extern void -lzms_init_slot_bases(void); +lzms_init_slots(void); +/* Return the slot for the specified value. */ extern u32 -lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots); +lzms_get_slot(u32 value, const u32 slot_base_tab[], u32 num_slots); static inline u32 -lzms_get_position_slot(u32 value) +lzms_get_position_slot(u32 position) { - return lzms_get_slot(value, lzms_position_slot_base, - LZMS_MAX_NUM_OFFSET_SYMS); + u32 order = bsr32(position); + u32 l = lzms_order_to_position_slot_bounds[order][0]; + u32 r = lzms_order_to_position_slot_bounds[order][1]; + + for (;;) { + u32 slot = (l + r) / 2; + if (position >= lzms_position_slot_base[slot]) { + if (position < lzms_position_slot_base[slot + 1]) + return slot; + else + l = slot + 1; + } else { + r = slot - 1; + } + } } static inline u32 -lzms_get_length_slot(u32 value) +lzms_get_length_slot(u32 length) { - return lzms_get_slot(value, lzms_length_slot_base, - LZMS_NUM_LEN_SYMS); + if (likely(length < LZMS_NUM_FAST_LENGTHS)) + return lzms_length_slot_fast[length]; + else + return lzms_get_slot(length, lzms_length_slot_base, + LZMS_NUM_LEN_SYMS); } extern void diff --git a/src/lzms-common.c b/src/lzms-common.c index 3609e5a5..92f28c2d 100644 --- a/src/lzms-common.c +++ b/src/lzms-common.c @@ -29,54 +29,85 @@ #endif #include "wimlib/endianness.h" -#include "wimlib/error.h" #include "wimlib/lzms.h" #include "wimlib/util.h" #include -/* A table that maps position slots to their base values. These are constants - * computed at runtime by lzms_compute_slot_bases(). */ +/*************************************************************** + * Constant tables initialized by lzms_compute_slots(): * + ***************************************************************/ + +/* Table: position slot => position slot base value */ u32 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1]; -/* A table that maps length slots to their base values. These are constants - * computed at runtime by lzms_compute_slot_bases(). */ +/* Table: position slot => number of extra position bits */ +u8 lzms_extra_position_bits[LZMS_MAX_NUM_OFFSET_SYMS]; + +/* Table: log2(position) => [lower bound, upper bound] on position slot */ +u16 lzms_order_to_position_slot_bounds[30][2]; + +/* Table: length slot => length slot base value */ u32 lzms_length_slot_base[LZMS_NUM_LEN_SYMS + 1]; -/* Return the slot for the specified value. */ -unsigned -lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots) -{ - /* TODO: Speed this up. */ - unsigned slot = 0; +/* Table: length slot => number of extra length bits */ +u8 lzms_extra_length_bits[LZMS_NUM_LEN_SYMS]; - while (slot_base_tab[slot + 1] <= value) - slot++; +/* Table: length (< LZMS_NUM_FAST_LENGTHS only) => length slot */ +u8 lzms_length_slot_fast[LZMS_NUM_FAST_LENGTHS]; - return slot; +u32 +lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots) +{ + u32 l = 0; + u32 r = num_slots - 1; + for (;;) { + LZMS_ASSERT(r >= l); + u32 slot = (l + r) / 2; + if (value >= slot_base_tab[slot]) { + if (value < slot_base_tab[slot + 1]) + return slot; + else + l = slot + 1; + } else { + r = slot - 1; + } + } } - static void lzms_decode_delta_rle_slot_bases(u32 slot_bases[], - const u8 delta_run_lens[], size_t num_run_lens) + u8 extra_bits[], + const u8 delta_run_lens[], + u32 num_run_lens, + u32 final, + u32 expected_num_slots) { + u32 order = 0; u32 delta = 1; u32 base = 0; - size_t slot = 0; - for (size_t i = 0; i < num_run_lens; i++) { + u32 slot = 0; + for (u32 i = 0; i < num_run_lens; i++) { u8 run_len = delta_run_lens[i]; while (run_len--) { base += delta; - slot_bases[slot++] = base; + if (slot > 0) + extra_bits[slot - 1] = order; + slot_bases[slot] = base; + slot++; } delta <<= 1; + order++; } + LZMS_ASSERT(slot == expected_num_slots); + + slot_bases[slot] = final; + extra_bits[slot - 1] = bsr32(slot_bases[slot] - slot_bases[slot - 1]); } /* Initialize the global position and length slot tables. */ static void -lzms_compute_slot_bases(void) +lzms_compute_slots(void) { /* If an explicit formula that maps LZMS position and length slots to * slot bases exists, then it could be used here. But until one is @@ -96,31 +127,52 @@ lzms_compute_slot_bases(void) 1, }; + /* Position slots */ lzms_decode_delta_rle_slot_bases(lzms_position_slot_base, + lzms_extra_position_bits, position_slot_delta_run_lens, - ARRAY_LEN(position_slot_delta_run_lens)); - - lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS] = 0x7fffffff; + ARRAY_LEN(position_slot_delta_run_lens), + 0x7fffffff, + LZMS_MAX_NUM_OFFSET_SYMS); + + for (u32 order = 0; order < 30; order++) { + lzms_order_to_position_slot_bounds[order][0] = + lzms_get_slot(1U << order, lzms_position_slot_base, + LZMS_MAX_NUM_OFFSET_SYMS); + lzms_order_to_position_slot_bounds[order][1] = + lzms_get_slot((1U << (order + 1)) - 1, lzms_position_slot_base, + LZMS_MAX_NUM_OFFSET_SYMS); + } + /* Length slots */ lzms_decode_delta_rle_slot_bases(lzms_length_slot_base, + lzms_extra_length_bits, length_slot_delta_run_lens, - ARRAY_LEN(length_slot_delta_run_lens)); - - lzms_length_slot_base[LZMS_NUM_LEN_SYMS] = 0x400108ab; + ARRAY_LEN(length_slot_delta_run_lens), + 0x400108ab, + LZMS_NUM_LEN_SYMS); + + /* Create table mapping short lengths to length slots. */ + for (u32 slot = 0, i = 0; i < LZMS_NUM_FAST_LENGTHS; i++) { + if (i >= lzms_length_slot_base[slot + 1]) + slot++; + lzms_length_slot_fast[i] = slot; + } } -/* Initialize the global position length slot tables if not done so already. */ +/* Initialize the global position and length slot tables if not done so already. + * */ void -lzms_init_slot_bases(void) +lzms_init_slots(void) { + static bool done = false; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - static bool already_computed = false; - if (unlikely(!already_computed)) { + if (unlikely(!done)) { pthread_mutex_lock(&mutex); - if (!already_computed) { - lzms_compute_slot_bases(); - already_computed = true; + if (!done) { + lzms_compute_slots(); + done = true; } pthread_mutex_unlock(&mutex); } @@ -261,7 +313,7 @@ lzms_x86_filter(u8 data[restrict], static void lzms_init_lz_lru_queues(struct lzms_lz_lru_queues *lz) { - /* Recent offsets for LZ matches */ + /* Recent offsets for LZ matches */ for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) lz->recent_offsets[i] = i + 1; @@ -272,7 +324,7 @@ lzms_init_lz_lru_queues(struct lzms_lz_lru_queues *lz) static void lzms_init_delta_lru_queues(struct lzms_delta_lru_queues *delta) { - /* Recent offsets and powers for LZ matches */ + /* Recent offsets and powers for LZ matches */ for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) { delta->recent_offsets[i] = i + 1; delta->recent_powers[i] = 0; @@ -287,8 +339,8 @@ lzms_init_delta_lru_queues(struct lzms_delta_lru_queues *delta) void lzms_init_lru_queues(struct lzms_lru_queues *lru) { - lzms_init_lz_lru_queues(&lru->lz); - lzms_init_delta_lru_queues(&lru->delta); + lzms_init_lz_lru_queues(&lru->lz); + lzms_init_delta_lru_queues(&lru->delta); } void @@ -321,6 +373,6 @@ lzms_update_delta_lru_queues(struct lzms_delta_lru_queues *delta) void lzms_update_lru_queues(struct lzms_lru_queues *lru) { - lzms_update_lz_lru_queues(&lru->lz); - lzms_update_delta_lru_queues(&lru->delta); + lzms_update_lz_lru_queues(&lru->lz); + lzms_update_delta_lru_queues(&lru->delta); } diff --git a/src/lzms-compress.c b/src/lzms-compress.c index 6df10806..31648f0a 100644 --- a/src/lzms-compress.c +++ b/src/lzms-compress.c @@ -33,7 +33,6 @@ #endif #include "wimlib.h" -#include "wimlib/assert.h" #include "wimlib/compiler.h" #include "wimlib/compressor_ops.h" #include "wimlib/compress_common.h" @@ -140,9 +139,7 @@ struct lzms_range_encoder { struct lzms_probability_entry prob_entries[LZMS_MAX_NUM_STATES]; }; -/* Structure used for Huffman encoding, optionally encoding larger "values" as a - * Huffman symbol specifying a slot and a slot-dependent number of extra bits. - * */ +/* Structure used for Huffman encoding. */ struct lzms_huffman_encoder { /* Bitstream to write Huffman-encoded symbols and verbatim bits to. @@ -150,9 +147,6 @@ struct lzms_huffman_encoder { */ struct lzms_output_bitstream *os; - /* Pointer to the slot base table to use. */ - const u32 *slot_base_tab; - /* Number of symbols that have been written using this code far. Reset * to 0 whenever the code is rebuilt. */ u32 num_syms_written; @@ -195,7 +189,8 @@ struct lzms_compressor { /* Suffix array match-finder. */ struct lz_sarray lz_sarray; - struct raw_match matches[64]; + /* Temporary space to store found matches. */ + struct raw_match *matches; /* Match-chooser. */ struct lz_match_chooser mc; @@ -460,29 +455,36 @@ lzms_huffman_encode_symbol(struct lzms_huffman_encoder *enc, u32 sym) } } -/* Encode a number as a Huffman symbol specifying a slot, plus a number of - * slot-dependent extra bits. */ static void -lzms_encode_value(struct lzms_huffman_encoder *enc, u32 value) +lzms_encode_length(struct lzms_huffman_encoder *enc, u32 length) { unsigned slot; unsigned num_extra_bits; u32 extra_bits; - LZMS_ASSERT(enc->slot_base_tab != NULL); + slot = lzms_get_length_slot(length); - slot = lzms_get_slot(value, enc->slot_base_tab, enc->num_syms); + num_extra_bits = lzms_extra_length_bits[slot]; - /* Get the number of extra bits needed to represent the range of values - * that share the slot. */ - num_extra_bits = bsr32(enc->slot_base_tab[slot + 1] - - enc->slot_base_tab[slot]); + extra_bits = length - lzms_length_slot_base[slot]; - /* Calculate the extra bits as the offset from the slot base. */ - extra_bits = value - enc->slot_base_tab[slot]; + lzms_huffman_encode_symbol(enc, slot); + lzms_output_bitstream_put_bits(enc->os, extra_bits, num_extra_bits); +} + +static void +lzms_encode_offset(struct lzms_huffman_encoder *enc, u32 offset) +{ + unsigned slot; + unsigned num_extra_bits; + u32 extra_bits; + + slot = lzms_get_position_slot(offset); + + num_extra_bits = lzms_extra_position_bits[slot]; + + extra_bits = offset - lzms_position_slot_base[slot]; - /* Output the slot (Huffman-encoded), then the extra bits (verbatim). - */ lzms_huffman_encode_symbol(enc, slot); lzms_output_bitstream_put_bits(enc->os, extra_bits, num_extra_bits); } @@ -558,7 +560,7 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) lzms_range_encode_bit(&ctx->lz_match_range_encoder, 0); /* Encode the match offset. */ - lzms_encode_value(&ctx->lz_offset_encoder, offset); + lzms_encode_offset(&ctx->lz_offset_encoder, offset); } else { int i; @@ -583,7 +585,7 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset) } /* Encode the match length. */ - lzms_encode_value(&ctx->length_encoder, length); + lzms_encode_length(&ctx->length_encoder, length); /* Save the match offset for later insertion at the front of the LZ * match offset LRU queue. */ @@ -654,7 +656,7 @@ lzms_lz_match_cost_fast(input_idx_t length, input_idx_t offset, const void *_lru /*#define LZMS_RC_COSTS_USE_FLOATING_POINT*/ static u32 -lzms_rc_costs[LZMS_PROBABILITY_MAX]; +lzms_rc_costs[LZMS_PROBABILITY_MAX + 1]; #ifdef LZMS_RC_COSTS_USE_FLOATING_POINT # include @@ -684,7 +686,7 @@ lzms_do_init_rc_costs(void) * really interpreted as 1 / 64 and 64 / 64 is really interpreted as * 63 / 64. */ - for (u32 i = 0; i < LZMS_PROBABILITY_MAX; i++) { + for (u32 i = 0; i <= LZMS_PROBABILITY_MAX; i++) { u32 prob = i; if (prob == 0) @@ -718,7 +720,7 @@ lzms_init_rc_costs(void) static bool done = false; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - if (!done) { + if (unlikely(!done)) { pthread_mutex_lock(&mutex); if (!done) { lzms_do_init_rc_costs(); @@ -747,11 +749,6 @@ lzms_rc_bit_cost(const struct lzms_range_encoder *enc, u8 *cur_state, int bit) *cur_state = (*cur_state << 1) | bit; - if (prob_zero == 0) - prob_zero = 1; - else if (prob_zero == LZMS_PROBABILITY_MAX) - prob_zero = LZMS_PROBABILITY_MAX - 1; - if (bit == 0) prob_correct = prob_zero; else @@ -766,20 +763,18 @@ lzms_huffman_symbol_cost(const struct lzms_huffman_encoder *enc, u32 sym) return enc->lens[sym] << LZMS_COST_SHIFT; } -/* Compute the cost to encode a number with lzms_encode_value(). */ static u32 -lzms_value_cost(const struct lzms_huffman_encoder *enc, u32 value) +lzms_offset_cost(const struct lzms_huffman_encoder *enc, u32 offset) { u32 slot; u32 num_extra_bits; u32 cost = 0; - slot = lzms_get_slot(value, enc->slot_base_tab, enc->num_syms); + slot = lzms_get_position_slot(offset); cost += lzms_huffman_symbol_cost(enc, slot); - num_extra_bits = bsr32(enc->slot_base_tab[slot + 1] - - enc->slot_base_tab[slot]); + num_extra_bits = lzms_extra_position_bits[slot]; cost += num_extra_bits << LZMS_COST_SHIFT; @@ -787,41 +782,33 @@ lzms_value_cost(const struct lzms_huffman_encoder *enc, u32 value) } static u32 -lzms_get_matches(struct lzms_compressor *ctx, - const struct lzms_adaptive_state *state, - struct raw_match **matches_ret) +lzms_length_cost(const struct lzms_huffman_encoder *enc, u32 length) { - u32 num_matches; - struct raw_match *matches = ctx->matches; + u32 slot; + u32 num_extra_bits; + u32 cost = 0; - num_matches = lz_sarray_get_matches(&ctx->lz_sarray, - matches, - lzms_lz_match_cost_fast, - &state->lru); -#if 0 - fprintf(stderr, "Pos %u: %u matches\n", - lz_sarray_get_pos(&ctx->lz_sarray) - 1, num_matches); - for (u32 i = 0; i < num_matches; i++) - fprintf(stderr, "\tLen %u Offset %u\n", matches[i].len, matches[i].offset); -#endif + slot = lzms_get_length_slot(length); -#ifdef ENABLE_LZMS_DEBUG - LZMS_ASSERT(lz_sarray_get_pos(&ctx->lz_sarray) > 0); - u32 curpos = lz_sarray_get_pos(&ctx->lz_sarray) - 1; - for (u32 i = 0; i < num_matches; i++) { - LZMS_ASSERT(matches[i].len <= ctx->window_size - curpos); - LZMS_ASSERT(matches[i].offset > 0); - LZMS_ASSERT(matches[i].offset <= curpos); - LZMS_ASSERT(!memcmp(&ctx->window[curpos], - &ctx->window[curpos - matches[i].offset], - matches[i].len)); - if (i < num_matches - 1) - LZMS_ASSERT(matches[i].len > matches[i + 1].len); + cost += lzms_huffman_symbol_cost(enc, slot); - } -#endif - *matches_ret = matches; - return num_matches; + num_extra_bits = lzms_extra_length_bits[slot]; + + cost += num_extra_bits << LZMS_COST_SHIFT; + + return cost; +} + +static u32 +lzms_get_matches(struct lzms_compressor *ctx, + const struct lzms_adaptive_state *state, + struct raw_match **matches_ret) +{ + *matches_ret = ctx->matches; + return lz_sarray_get_matches(&ctx->lz_sarray, + ctx->matches, + lzms_lz_match_cost_fast, + &state->lru); } static void @@ -873,7 +860,7 @@ lzms_get_lz_match_cost(struct lzms_compressor *ctx, cost += lzms_rc_bit_cost(&ctx->lz_match_range_encoder, &state->lz_match_state, 0); - cost += lzms_value_cost(&ctx->lz_offset_encoder, offset); + cost += lzms_offset_cost(&ctx->lz_offset_encoder, offset); } else { int i; @@ -895,7 +882,7 @@ lzms_get_lz_match_cost(struct lzms_compressor *ctx, state->lru.recent_offsets[i] = state->lru.recent_offsets[i + 1]; } - cost += lzms_value_cost(&ctx->length_encoder, length); + cost += lzms_length_cost(&ctx->length_encoder, length); state->lru.upcoming_offset = offset; lzms_update_lz_lru_queues(&state->lru); @@ -980,12 +967,10 @@ lzms_init_range_encoder(struct lzms_range_encoder *enc, static void lzms_init_huffman_encoder(struct lzms_huffman_encoder *enc, struct lzms_output_bitstream *os, - const u32 *slot_base_tab, unsigned num_syms, unsigned rebuild_freq) { enc->os = os; - enc->slot_base_tab = slot_base_tab; enc->num_syms_written = 0; enc->rebuild_freq = rebuild_freq; enc->num_syms = num_syms; @@ -1028,23 +1013,23 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen, /* Initialize Huffman encoders for each alphabet used in the compressed * representation. */ lzms_init_huffman_encoder(&ctx->literal_encoder, &ctx->os, - NULL, LZMS_NUM_LITERAL_SYMS, + LZMS_NUM_LITERAL_SYMS, LZMS_LITERAL_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->lz_offset_encoder, &ctx->os, - lzms_position_slot_base, num_position_slots, + num_position_slots, LZMS_LZ_OFFSET_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->length_encoder, &ctx->os, - lzms_length_slot_base, LZMS_NUM_LEN_SYMS, + LZMS_NUM_LEN_SYMS, LZMS_LENGTH_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->delta_offset_encoder, &ctx->os, - lzms_position_slot_base, num_position_slots, + num_position_slots, LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ); lzms_init_huffman_encoder(&ctx->delta_power_encoder, &ctx->os, - NULL, LZMS_NUM_DELTA_POWER_SYMS, + LZMS_NUM_DELTA_POWER_SYMS, LZMS_DELTA_POWER_CODE_REBUILD_FREQ); /* Initialize range encoders, all of which wrap around the same @@ -1070,7 +1055,7 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen, &ctx->rc, LZMS_NUM_DELTA_REPEAT_MATCH_STATES); /* Initialize LRU match information. */ - lzms_init_lru_queues(&ctx->lru); + lzms_init_lru_queues(&ctx->lru); } /* Flush the output streams, prepare the final compressed data, and return its @@ -1162,8 +1147,9 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size, /* Compute and encode a literal/match sequence that decompresses to the * preprocessed data. */ +#if 1 lzms_normal_encode(ctx); -#if 0 +#else lzms_fast_encode(ctx); #endif @@ -1233,6 +1219,7 @@ lzms_free_compressor(void *_ctx) #if 0 FREE(ctx->prev_tab); #endif + FREE(ctx->matches); lz_sarray_destroy(&ctx->lz_sarray); lz_match_chooser_destroy(&ctx->mc); FREE(ctx); @@ -1289,6 +1276,13 @@ lzms_create_compressor(size_t max_block_size, goto oom; #endif + ctx->matches = MALLOC(min(params->max_match_length - + params->min_match_length + 1, + params->max_matches_per_pos) * + sizeof(ctx->matches[0])); + if (ctx->matches == NULL) + goto oom; + if (!lz_sarray_init(&ctx->lz_sarray, max_block_size, params->min_match_length, params->max_match_length, @@ -1302,8 +1296,8 @@ lzms_create_compressor(size_t max_block_size, params->max_match_length)) goto oom; - /* Initialize position and length slot bases if not done already. */ - lzms_init_slot_bases(); + /* Initialize position and length slot data if not done already. */ + lzms_init_slots(); /* Initialize range encoding cost table if not done already. */ lzms_init_rc_costs(); diff --git a/src/lzms-decompress.c b/src/lzms-decompress.c index 83629008..32ce082c 100644 --- a/src/lzms-decompress.c +++ b/src/lzms-decompress.c @@ -194,7 +194,6 @@ #include "wimlib/compress_common.h" #include "wimlib/decompressor_ops.h" #include "wimlib/decompress_common.h" -#include "wimlib/error.h" #include "wimlib/lzms.h" #include "wimlib/util.h" @@ -284,6 +283,8 @@ struct lzms_huffman_decoder { * read using this decoder. */ const u32 *slot_base_tab; + const u8 *extra_bits_tab; + /* Number of symbols that have been read using this code far. Reset to * 0 whenever the code is rebuilt. */ u32 num_syms_read; @@ -628,19 +629,19 @@ lzms_decode_value(struct lzms_huffman_decoder *dec) unsigned num_extra_bits; u32 extra_bits; + LZMS_ASSERT(dec->slot_base_tab != NULL); + LZMS_ASSERT(dec->extra_bits_tab != NULL); + /* Read the slot (position slot, length slot, etc.), which is encoded as * a Huffman symbol. */ slot = lzms_huffman_decode_symbol(dec); - LZMS_ASSERT(dec->slot_base_tab != NULL); - /* Get the number of extra bits needed to represent the range of values * that share the slot. */ - num_extra_bits = bsr32(dec->slot_base_tab[slot + 1] - - dec->slot_base_tab[slot]); + num_extra_bits = dec->extra_bits_tab[slot]; - /* Read the number of extra bits and add them to the slot to form the - * final decoded value. */ + /* Read the number of extra bits and add them to the slot base to form + * the final decoded value. */ extra_bits = lzms_input_bitstream_read_bits(dec->is, num_extra_bits); return dec->slot_base_tab[slot] + extra_bits; } @@ -852,11 +853,14 @@ lzms_init_range_decoder(struct lzms_range_decoder *dec, static void lzms_init_huffman_decoder(struct lzms_huffman_decoder *dec, struct lzms_input_bitstream *is, - const u32 *slot_base_tab, unsigned num_syms, + const u32 *slot_base_tab, + const u8 *extra_bits_tab, + unsigned num_syms, unsigned rebuild_freq) { dec->is = is; dec->slot_base_tab = slot_base_tab; + dec->extra_bits_tab = extra_bits_tab; dec->num_syms = num_syms; dec->num_syms_read = rebuild_freq; dec->rebuild_freq = rebuild_freq; @@ -895,23 +899,29 @@ lzms_init_decompressor(struct lzms_decompressor *ctx, /* Initialize Huffman decoders for each alphabet used in the compressed * representation. */ lzms_init_huffman_decoder(&ctx->literal_decoder, &ctx->is, - NULL, LZMS_NUM_LITERAL_SYMS, + NULL, NULL, LZMS_NUM_LITERAL_SYMS, LZMS_LITERAL_CODE_REBUILD_FREQ); lzms_init_huffman_decoder(&ctx->lz_offset_decoder, &ctx->is, - lzms_position_slot_base, num_position_slots, + lzms_position_slot_base, + lzms_extra_position_bits, + num_position_slots, LZMS_LZ_OFFSET_CODE_REBUILD_FREQ); lzms_init_huffman_decoder(&ctx->length_decoder, &ctx->is, - lzms_length_slot_base, LZMS_NUM_LEN_SYMS, + lzms_length_slot_base, + lzms_extra_length_bits, + LZMS_NUM_LEN_SYMS, LZMS_LENGTH_CODE_REBUILD_FREQ); lzms_init_huffman_decoder(&ctx->delta_offset_decoder, &ctx->is, - lzms_position_slot_base, num_position_slots, + lzms_position_slot_base, + lzms_extra_position_bits, + num_position_slots, LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ); lzms_init_huffman_decoder(&ctx->delta_power_decoder, &ctx->is, - NULL, LZMS_NUM_DELTA_POWER_SYMS, + NULL, NULL, LZMS_NUM_DELTA_POWER_SYMS, LZMS_DELTA_POWER_CODE_REBUILD_FREQ); @@ -1031,8 +1041,8 @@ lzms_create_decompressor(size_t max_block_size, if (ctx == NULL) return WIMLIB_ERR_NOMEM; - /* Initialize position and length slot bases if not done already. */ - lzms_init_slot_bases(); + /* Initialize position and length slot data if not done already. */ + lzms_init_slots(); *ctx_ret = ctx; return 0; -- 2.43.0