LZMS: Accelerate slot-finding
authorEric Biggers <ebiggers3@gmail.com>
Thu, 2 Jan 2014 04:54:03 +0000 (22:54 -0600)
committerEric Biggers <ebiggers3@gmail.com>
Thu, 2 Jan 2014 04:54:03 +0000 (22:54 -0600)
include/wimlib/lzms.h
src/lzms-common.c
src/lzms-compress.c
src/lzms-decompress.c

index ff3888e..798b339 100644 (file)
@@ -8,6 +8,8 @@
 #ifdef ENABLE_LZMS_DEBUG
 #      define LZMS_DEBUG DEBUG
 #       define LZMS_ASSERT wimlib_assert
+#       include "wimlib/assert.h"
+#       include "wimlib/error.h"
 #else
 #      define LZMS_DEBUG(format, ...)
 #      define LZMS_ASSERT(...)
@@ -47,7 +49,7 @@
 
 /* Code shared between the LZMS decompressor and compressor.  */
 
-#include <wimlib/types.h>
+#include <wimlib/util.h>
 
 extern void
 lzms_x86_filter(u8 data[], s32 size, s32 last_target_usages[], bool undo);
@@ -103,26 +105,52 @@ struct lzms_lru_queues {
 
 extern u32 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1];
 
+extern u8 lzms_extra_position_bits[LZMS_MAX_NUM_OFFSET_SYMS];
+
+extern u16 lzms_order_to_position_slot_bounds[30][2];
+
 extern u32 lzms_length_slot_base[LZMS_NUM_LEN_SYMS + 1];
 
+#define LZMS_NUM_FAST_LENGTHS 1024
+extern u8 lzms_length_slot_fast[LZMS_NUM_FAST_LENGTHS];
+
+extern u8 lzms_extra_length_bits[LZMS_NUM_LEN_SYMS];
+
 extern void
-lzms_init_slot_bases(void);
+lzms_init_slots(void);
 
+/* Return the slot for the specified value.  */
 extern u32
-lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots);
+lzms_get_slot(u32 value, const u32 slot_base_tab[], u32 num_slots);
 
 static inline u32
-lzms_get_position_slot(u32 value)
+lzms_get_position_slot(u32 position)
 {
-       return lzms_get_slot(value, lzms_position_slot_base,
-                            LZMS_MAX_NUM_OFFSET_SYMS);
+       u32 order = bsr32(position);
+       u32 l = lzms_order_to_position_slot_bounds[order][0];
+       u32 r = lzms_order_to_position_slot_bounds[order][1];
+
+       for (;;) {
+               u32 slot = (l + r) / 2;
+               if (position >= lzms_position_slot_base[slot]) {
+                       if (position < lzms_position_slot_base[slot + 1])
+                               return slot;
+                       else
+                               l = slot + 1;
+               } else {
+                       r = slot - 1;
+               }
+       }
 }
 
 static inline u32
-lzms_get_length_slot(u32 value)
+lzms_get_length_slot(u32 length)
 {
-       return lzms_get_slot(value, lzms_length_slot_base,
-                            LZMS_NUM_LEN_SYMS);
+       if (likely(length < LZMS_NUM_FAST_LENGTHS))
+               return lzms_length_slot_fast[length];
+       else
+               return lzms_get_slot(length, lzms_length_slot_base,
+                                    LZMS_NUM_LEN_SYMS);
 }
 
 extern void
index 3609e5a..92f28c2 100644 (file)
 #endif
 
 #include "wimlib/endianness.h"
-#include "wimlib/error.h"
 #include "wimlib/lzms.h"
 #include "wimlib/util.h"
 
 #include <pthread.h>
 
-/* A table that maps position slots to their base values.  These are constants
- * computed at runtime by lzms_compute_slot_bases().  */
+/***************************************************************
+ * Constant tables initialized by lzms_compute_slots():        *
+ ***************************************************************/
+
+/* Table: position slot => position slot base value  */
 u32 lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS + 1];
 
-/* A table that maps length slots to their base values.  These are constants
- * computed at runtime by lzms_compute_slot_bases().  */
+/* Table: position slot => number of extra position bits  */
+u8 lzms_extra_position_bits[LZMS_MAX_NUM_OFFSET_SYMS];
+
+/* Table: log2(position) => [lower bound, upper bound] on position slot  */
+u16 lzms_order_to_position_slot_bounds[30][2];
+
+/* Table: length slot => length slot base value  */
 u32 lzms_length_slot_base[LZMS_NUM_LEN_SYMS + 1];
 
-/* Return the slot for the specified value.  */
-unsigned
-lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots)
-{
-       /* TODO:  Speed this up.  */
-       unsigned slot = 0;
+/* Table: length slot => number of extra length bits  */
+u8 lzms_extra_length_bits[LZMS_NUM_LEN_SYMS];
 
-       while (slot_base_tab[slot + 1] <= value)
-               slot++;
+/* Table: length (< LZMS_NUM_FAST_LENGTHS only) => length slot  */
+u8 lzms_length_slot_fast[LZMS_NUM_FAST_LENGTHS];
 
-       return slot;
+u32
+lzms_get_slot(u32 value, const u32 slot_base_tab[], unsigned num_slots)
+{
+       u32 l = 0;
+       u32 r = num_slots - 1;
+       for (;;) {
+               LZMS_ASSERT(r >= l);
+               u32 slot = (l + r) / 2;
+               if (value >= slot_base_tab[slot]) {
+                       if (value < slot_base_tab[slot + 1])
+                               return slot;
+                       else
+                               l = slot + 1;
+               } else {
+                       r = slot - 1;
+               }
+       }
 }
 
-
 static void
 lzms_decode_delta_rle_slot_bases(u32 slot_bases[],
-                                const u8 delta_run_lens[], size_t num_run_lens)
+                                u8 extra_bits[],
+                                const u8 delta_run_lens[],
+                                u32 num_run_lens,
+                                u32 final,
+                                u32 expected_num_slots)
 {
+       u32 order = 0;
        u32 delta = 1;
        u32 base = 0;
-       size_t slot = 0;
-       for (size_t i = 0; i < num_run_lens; i++) {
+       u32 slot = 0;
+       for (u32 i = 0; i < num_run_lens; i++) {
                u8 run_len = delta_run_lens[i];
                while (run_len--) {
                        base += delta;
-                       slot_bases[slot++] = base;
+                       if (slot > 0)
+                               extra_bits[slot - 1] = order;
+                       slot_bases[slot] = base;
+                       slot++;
                }
                delta <<= 1;
+               order++;
        }
+       LZMS_ASSERT(slot == expected_num_slots);
+
+       slot_bases[slot] = final;
+       extra_bits[slot - 1] = bsr32(slot_bases[slot] - slot_bases[slot - 1]);
 }
 
 /* Initialize the global position and length slot tables.  */
 static void
-lzms_compute_slot_bases(void)
+lzms_compute_slots(void)
 {
        /* If an explicit formula that maps LZMS position and length slots to
         * slot bases exists, then it could be used here.  But until one is
@@ -96,31 +127,52 @@ lzms_compute_slot_bases(void)
                1,
        };
 
+       /* Position slots  */
        lzms_decode_delta_rle_slot_bases(lzms_position_slot_base,
+                                        lzms_extra_position_bits,
                                         position_slot_delta_run_lens,
-                                        ARRAY_LEN(position_slot_delta_run_lens));
-
-       lzms_position_slot_base[LZMS_MAX_NUM_OFFSET_SYMS] = 0x7fffffff;
+                                        ARRAY_LEN(position_slot_delta_run_lens),
+                                        0x7fffffff,
+                                        LZMS_MAX_NUM_OFFSET_SYMS);
+
+       for (u32 order = 0; order < 30; order++) {
+               lzms_order_to_position_slot_bounds[order][0] =
+                       lzms_get_slot(1U << order, lzms_position_slot_base,
+                                     LZMS_MAX_NUM_OFFSET_SYMS);
+               lzms_order_to_position_slot_bounds[order][1] =
+                       lzms_get_slot((1U << (order + 1)) - 1, lzms_position_slot_base,
+                                     LZMS_MAX_NUM_OFFSET_SYMS);
+       }
 
+       /* Length slots  */
        lzms_decode_delta_rle_slot_bases(lzms_length_slot_base,
+                                        lzms_extra_length_bits,
                                         length_slot_delta_run_lens,
-                                        ARRAY_LEN(length_slot_delta_run_lens));
-
-       lzms_length_slot_base[LZMS_NUM_LEN_SYMS] = 0x400108ab;
+                                        ARRAY_LEN(length_slot_delta_run_lens),
+                                        0x400108ab,
+                                        LZMS_NUM_LEN_SYMS);
+
+       /* Create table mapping short lengths to length slots.  */
+       for (u32 slot = 0, i = 0; i < LZMS_NUM_FAST_LENGTHS; i++) {
+               if (i >= lzms_length_slot_base[slot + 1])
+                       slot++;
+               lzms_length_slot_fast[i] = slot;
+       }
 }
 
-/* Initialize the global position length slot tables if not done so already.  */
+/* Initialize the global position and length slot tables if not done so already.
+ * */
 void
-lzms_init_slot_bases(void)
+lzms_init_slots(void)
 {
+       static bool done = false;
        static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-       static bool already_computed = false;
 
-       if (unlikely(!already_computed)) {
+       if (unlikely(!done)) {
                pthread_mutex_lock(&mutex);
-               if (!already_computed) {
-                       lzms_compute_slot_bases();
-                       already_computed = true;
+               if (!done) {
+                       lzms_compute_slots();
+                       done = true;
                }
                pthread_mutex_unlock(&mutex);
        }
@@ -261,7 +313,7 @@ lzms_x86_filter(u8 data[restrict],
 static void
 lzms_init_lz_lru_queues(struct lzms_lz_lru_queues *lz)
 {
-        /* Recent offsets for LZ matches  */
+       /* Recent offsets for LZ matches  */
        for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++)
                lz->recent_offsets[i] = i + 1;
 
@@ -272,7 +324,7 @@ lzms_init_lz_lru_queues(struct lzms_lz_lru_queues *lz)
 static void
 lzms_init_delta_lru_queues(struct lzms_delta_lru_queues *delta)
 {
-        /* Recent offsets and powers for LZ matches  */
+       /* Recent offsets and powers for LZ matches  */
        for (u32 i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++) {
                delta->recent_offsets[i] = i + 1;
                delta->recent_powers[i] = 0;
@@ -287,8 +339,8 @@ lzms_init_delta_lru_queues(struct lzms_delta_lru_queues *delta)
 void
 lzms_init_lru_queues(struct lzms_lru_queues *lru)
 {
-        lzms_init_lz_lru_queues(&lru->lz);
-        lzms_init_delta_lru_queues(&lru->delta);
+       lzms_init_lz_lru_queues(&lru->lz);
+       lzms_init_delta_lru_queues(&lru->delta);
 }
 
 void
@@ -321,6 +373,6 @@ lzms_update_delta_lru_queues(struct lzms_delta_lru_queues *delta)
 void
 lzms_update_lru_queues(struct lzms_lru_queues *lru)
 {
-        lzms_update_lz_lru_queues(&lru->lz);
-        lzms_update_delta_lru_queues(&lru->delta);
+       lzms_update_lz_lru_queues(&lru->lz);
+       lzms_update_delta_lru_queues(&lru->delta);
 }
index 6df1080..31648f0 100644 (file)
@@ -33,7 +33,6 @@
 #endif
 
 #include "wimlib.h"
-#include "wimlib/assert.h"
 #include "wimlib/compiler.h"
 #include "wimlib/compressor_ops.h"
 #include "wimlib/compress_common.h"
@@ -140,9 +139,7 @@ struct lzms_range_encoder {
        struct lzms_probability_entry prob_entries[LZMS_MAX_NUM_STATES];
 };
 
-/* Structure used for Huffman encoding, optionally encoding larger "values" as a
- * Huffman symbol specifying a slot and a slot-dependent number of extra bits.
- * */
+/* Structure used for Huffman encoding.  */
 struct lzms_huffman_encoder {
 
        /* Bitstream to write Huffman-encoded symbols and verbatim bits to.
@@ -150,9 +147,6 @@ struct lzms_huffman_encoder {
         */
        struct lzms_output_bitstream *os;
 
-       /* Pointer to the slot base table to use.  */
-       const u32 *slot_base_tab;
-
        /* Number of symbols that have been written using this code far.  Reset
         * to 0 whenever the code is rebuilt.  */
        u32 num_syms_written;
@@ -195,7 +189,8 @@ struct lzms_compressor {
        /* Suffix array match-finder.  */
        struct lz_sarray lz_sarray;
 
-       struct raw_match matches[64];
+       /* Temporary space to store found matches.  */
+       struct raw_match *matches;
 
        /* Match-chooser.  */
        struct lz_match_chooser mc;
@@ -460,29 +455,36 @@ lzms_huffman_encode_symbol(struct lzms_huffman_encoder *enc, u32 sym)
        }
 }
 
-/* Encode a number as a Huffman symbol specifying a slot, plus a number of
- * slot-dependent extra bits.  */
 static void
-lzms_encode_value(struct lzms_huffman_encoder *enc, u32 value)
+lzms_encode_length(struct lzms_huffman_encoder *enc, u32 length)
 {
        unsigned slot;
        unsigned num_extra_bits;
        u32 extra_bits;
 
-       LZMS_ASSERT(enc->slot_base_tab != NULL);
+       slot = lzms_get_length_slot(length);
 
-       slot = lzms_get_slot(value, enc->slot_base_tab, enc->num_syms);
+       num_extra_bits = lzms_extra_length_bits[slot];
 
-       /* Get the number of extra bits needed to represent the range of values
-        * that share the slot.  */
-       num_extra_bits = bsr32(enc->slot_base_tab[slot + 1] -
-                              enc->slot_base_tab[slot]);
+       extra_bits = length - lzms_length_slot_base[slot];
 
-       /* Calculate the extra bits as the offset from the slot base.  */
-       extra_bits = value - enc->slot_base_tab[slot];
+       lzms_huffman_encode_symbol(enc, slot);
+       lzms_output_bitstream_put_bits(enc->os, extra_bits, num_extra_bits);
+}
+
+static void
+lzms_encode_offset(struct lzms_huffman_encoder *enc, u32 offset)
+{
+       unsigned slot;
+       unsigned num_extra_bits;
+       u32 extra_bits;
+
+       slot = lzms_get_position_slot(offset);
+
+       num_extra_bits = lzms_extra_position_bits[slot];
+
+       extra_bits = offset - lzms_position_slot_base[slot];
 
-       /* Output the slot (Huffman-encoded), then the extra bits (verbatim).
-        */
        lzms_huffman_encode_symbol(enc, slot);
        lzms_output_bitstream_put_bits(enc->os, extra_bits, num_extra_bits);
 }
@@ -558,7 +560,7 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset)
                lzms_range_encode_bit(&ctx->lz_match_range_encoder, 0);
 
                /* Encode the match offset.  */
-               lzms_encode_value(&ctx->lz_offset_encoder, offset);
+               lzms_encode_offset(&ctx->lz_offset_encoder, offset);
        } else {
                int i;
 
@@ -583,7 +585,7 @@ lzms_encode_lz_match(struct lzms_compressor *ctx, u32 length, u32 offset)
        }
 
        /* Encode the match length.  */
-       lzms_encode_value(&ctx->length_encoder, length);
+       lzms_encode_length(&ctx->length_encoder, length);
 
        /* Save the match offset for later insertion at the front of the LZ
         * match offset LRU queue.  */
@@ -654,7 +656,7 @@ lzms_lz_match_cost_fast(input_idx_t length, input_idx_t offset, const void *_lru
 /*#define LZMS_RC_COSTS_USE_FLOATING_POINT*/
 
 static u32
-lzms_rc_costs[LZMS_PROBABILITY_MAX];
+lzms_rc_costs[LZMS_PROBABILITY_MAX + 1];
 
 #ifdef LZMS_RC_COSTS_USE_FLOATING_POINT
 #  include <math.h>
@@ -684,7 +686,7 @@ lzms_do_init_rc_costs(void)
         * really interpreted as 1 / 64 and 64 / 64 is really interpreted as
         * 63 / 64.
         */
-       for (u32 i = 0; i < LZMS_PROBABILITY_MAX; i++) {
+       for (u32 i = 0; i <= LZMS_PROBABILITY_MAX; i++) {
                u32 prob = i;
 
                if (prob == 0)
@@ -718,7 +720,7 @@ lzms_init_rc_costs(void)
        static bool done = false;
        static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
 
-       if (!done) {
+       if (unlikely(!done)) {
                pthread_mutex_lock(&mutex);
                if (!done) {
                        lzms_do_init_rc_costs();
@@ -747,11 +749,6 @@ lzms_rc_bit_cost(const struct lzms_range_encoder *enc, u8 *cur_state, int bit)
 
        *cur_state = (*cur_state << 1) | bit;
 
-       if (prob_zero == 0)
-               prob_zero = 1;
-       else if (prob_zero == LZMS_PROBABILITY_MAX)
-               prob_zero = LZMS_PROBABILITY_MAX - 1;
-
        if (bit == 0)
                prob_correct = prob_zero;
        else
@@ -766,20 +763,18 @@ lzms_huffman_symbol_cost(const struct lzms_huffman_encoder *enc, u32 sym)
        return enc->lens[sym] << LZMS_COST_SHIFT;
 }
 
-/* Compute the cost to encode a number with lzms_encode_value().  */
 static u32
-lzms_value_cost(const struct lzms_huffman_encoder *enc, u32 value)
+lzms_offset_cost(const struct lzms_huffman_encoder *enc, u32 offset)
 {
        u32 slot;
        u32 num_extra_bits;
        u32 cost = 0;
 
-       slot = lzms_get_slot(value, enc->slot_base_tab, enc->num_syms);
+       slot = lzms_get_position_slot(offset);
 
        cost += lzms_huffman_symbol_cost(enc, slot);
 
-       num_extra_bits = bsr32(enc->slot_base_tab[slot + 1] -
-                              enc->slot_base_tab[slot]);
+       num_extra_bits = lzms_extra_position_bits[slot];
 
        cost += num_extra_bits << LZMS_COST_SHIFT;
 
@@ -787,41 +782,33 @@ lzms_value_cost(const struct lzms_huffman_encoder *enc, u32 value)
 }
 
 static u32
-lzms_get_matches(struct lzms_compressor *ctx,
-                const struct lzms_adaptive_state *state,
-                struct raw_match **matches_ret)
+lzms_length_cost(const struct lzms_huffman_encoder *enc, u32 length)
 {
-       u32 num_matches;
-       struct raw_match *matches = ctx->matches;
+       u32 slot;
+       u32 num_extra_bits;
+       u32 cost = 0;
 
-       num_matches = lz_sarray_get_matches(&ctx->lz_sarray,
-                                           matches,
-                                           lzms_lz_match_cost_fast,
-                                           &state->lru);
-#if 0
-       fprintf(stderr, "Pos %u: %u matches\n",
-               lz_sarray_get_pos(&ctx->lz_sarray) - 1, num_matches);
-       for (u32 i = 0; i < num_matches; i++)
-               fprintf(stderr, "\tLen %u Offset %u\n", matches[i].len, matches[i].offset);
-#endif
+       slot = lzms_get_length_slot(length);
 
-#ifdef ENABLE_LZMS_DEBUG
-       LZMS_ASSERT(lz_sarray_get_pos(&ctx->lz_sarray) > 0);
-       u32 curpos = lz_sarray_get_pos(&ctx->lz_sarray) - 1;
-       for (u32 i = 0; i < num_matches; i++) {
-               LZMS_ASSERT(matches[i].len <= ctx->window_size - curpos);
-               LZMS_ASSERT(matches[i].offset > 0);
-               LZMS_ASSERT(matches[i].offset <= curpos);
-               LZMS_ASSERT(!memcmp(&ctx->window[curpos],
-                                   &ctx->window[curpos - matches[i].offset],
-                                   matches[i].len));
-               if (i < num_matches - 1)
-                       LZMS_ASSERT(matches[i].len > matches[i + 1].len);
+       cost += lzms_huffman_symbol_cost(enc, slot);
 
-       }
-#endif
-       *matches_ret = matches;
-       return num_matches;
+       num_extra_bits = lzms_extra_length_bits[slot];
+
+       cost += num_extra_bits << LZMS_COST_SHIFT;
+
+       return cost;
+}
+
+static u32
+lzms_get_matches(struct lzms_compressor *ctx,
+                const struct lzms_adaptive_state *state,
+                struct raw_match **matches_ret)
+{
+       *matches_ret = ctx->matches;
+       return lz_sarray_get_matches(&ctx->lz_sarray,
+                                    ctx->matches,
+                                    lzms_lz_match_cost_fast,
+                                    &state->lru);
 }
 
 static void
@@ -873,7 +860,7 @@ lzms_get_lz_match_cost(struct lzms_compressor *ctx,
                cost += lzms_rc_bit_cost(&ctx->lz_match_range_encoder,
                                         &state->lz_match_state, 0);
 
-               cost += lzms_value_cost(&ctx->lz_offset_encoder, offset);
+               cost += lzms_offset_cost(&ctx->lz_offset_encoder, offset);
        } else {
                int i;
 
@@ -895,7 +882,7 @@ lzms_get_lz_match_cost(struct lzms_compressor *ctx,
                        state->lru.recent_offsets[i] = state->lru.recent_offsets[i + 1];
        }
 
-       cost += lzms_value_cost(&ctx->length_encoder, length);
+       cost += lzms_length_cost(&ctx->length_encoder, length);
 
        state->lru.upcoming_offset = offset;
        lzms_update_lz_lru_queues(&state->lru);
@@ -980,12 +967,10 @@ lzms_init_range_encoder(struct lzms_range_encoder *enc,
 static void
 lzms_init_huffman_encoder(struct lzms_huffman_encoder *enc,
                          struct lzms_output_bitstream *os,
-                         const u32 *slot_base_tab,
                          unsigned num_syms,
                          unsigned rebuild_freq)
 {
        enc->os = os;
-       enc->slot_base_tab = slot_base_tab;
        enc->num_syms_written = 0;
        enc->rebuild_freq = rebuild_freq;
        enc->num_syms = num_syms;
@@ -1028,23 +1013,23 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen,
        /* Initialize Huffman encoders for each alphabet used in the compressed
         * representation.  */
        lzms_init_huffman_encoder(&ctx->literal_encoder, &ctx->os,
-                                 NULL, LZMS_NUM_LITERAL_SYMS,
+                                 LZMS_NUM_LITERAL_SYMS,
                                  LZMS_LITERAL_CODE_REBUILD_FREQ);
 
        lzms_init_huffman_encoder(&ctx->lz_offset_encoder, &ctx->os,
-                                 lzms_position_slot_base, num_position_slots,
+                                 num_position_slots,
                                  LZMS_LZ_OFFSET_CODE_REBUILD_FREQ);
 
        lzms_init_huffman_encoder(&ctx->length_encoder, &ctx->os,
-                                 lzms_length_slot_base, LZMS_NUM_LEN_SYMS,
+                                 LZMS_NUM_LEN_SYMS,
                                  LZMS_LENGTH_CODE_REBUILD_FREQ);
 
        lzms_init_huffman_encoder(&ctx->delta_offset_encoder, &ctx->os,
-                                 lzms_position_slot_base, num_position_slots,
+                                 num_position_slots,
                                  LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ);
 
        lzms_init_huffman_encoder(&ctx->delta_power_encoder, &ctx->os,
-                                 NULL, LZMS_NUM_DELTA_POWER_SYMS,
+                                 LZMS_NUM_DELTA_POWER_SYMS,
                                  LZMS_DELTA_POWER_CODE_REBUILD_FREQ);
 
        /* Initialize range encoders, all of which wrap around the same
@@ -1070,7 +1055,7 @@ lzms_init_compressor(struct lzms_compressor *ctx, const u8 *udata, u32 ulen,
                                        &ctx->rc, LZMS_NUM_DELTA_REPEAT_MATCH_STATES);
 
        /* Initialize LRU match information.  */
-        lzms_init_lru_queues(&ctx->lru);
+       lzms_init_lru_queues(&ctx->lru);
 }
 
 /* Flush the output streams, prepare the final compressed data, and return its
@@ -1162,8 +1147,9 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size,
 
        /* Compute and encode a literal/match sequence that decompresses to the
         * preprocessed data.  */
+#if 1
        lzms_normal_encode(ctx);
-#if 0
+#else
        lzms_fast_encode(ctx);
 #endif
 
@@ -1233,6 +1219,7 @@ lzms_free_compressor(void *_ctx)
 #if 0
                FREE(ctx->prev_tab);
 #endif
+               FREE(ctx->matches);
                lz_sarray_destroy(&ctx->lz_sarray);
                lz_match_chooser_destroy(&ctx->mc);
                FREE(ctx);
@@ -1289,6 +1276,13 @@ lzms_create_compressor(size_t max_block_size,
                goto oom;
 #endif
 
+       ctx->matches = MALLOC(min(params->max_match_length -
+                                       params->min_match_length + 1,
+                                 params->max_matches_per_pos) *
+                               sizeof(ctx->matches[0]));
+       if (ctx->matches == NULL)
+               goto oom;
+
        if (!lz_sarray_init(&ctx->lz_sarray, max_block_size,
                            params->min_match_length,
                            params->max_match_length,
@@ -1302,8 +1296,8 @@ lzms_create_compressor(size_t max_block_size,
                                   params->max_match_length))
                goto oom;
 
-       /* Initialize position and length slot bases if not done already.  */
-       lzms_init_slot_bases();
+       /* Initialize position and length slot data if not done already.  */
+       lzms_init_slots();
 
        /* Initialize range encoding cost table if not done already.  */
        lzms_init_rc_costs();
index 8362900..32ce082 100644 (file)
 #include "wimlib/compress_common.h"
 #include "wimlib/decompressor_ops.h"
 #include "wimlib/decompress_common.h"
-#include "wimlib/error.h"
 #include "wimlib/lzms.h"
 #include "wimlib/util.h"
 
@@ -284,6 +283,8 @@ struct lzms_huffman_decoder {
         * read using this decoder.  */
        const u32 *slot_base_tab;
 
+       const u8 *extra_bits_tab;
+
        /* Number of symbols that have been read using this code far.  Reset to
         * 0 whenever the code is rebuilt.  */
        u32 num_syms_read;
@@ -628,19 +629,19 @@ lzms_decode_value(struct lzms_huffman_decoder *dec)
        unsigned num_extra_bits;
        u32 extra_bits;
 
+       LZMS_ASSERT(dec->slot_base_tab != NULL);
+       LZMS_ASSERT(dec->extra_bits_tab != NULL);
+
        /* Read the slot (position slot, length slot, etc.), which is encoded as
         * a Huffman symbol.  */
        slot = lzms_huffman_decode_symbol(dec);
 
-       LZMS_ASSERT(dec->slot_base_tab != NULL);
-
        /* Get the number of extra bits needed to represent the range of values
         * that share the slot.  */
-       num_extra_bits = bsr32(dec->slot_base_tab[slot + 1] -
-                              dec->slot_base_tab[slot]);
+       num_extra_bits = dec->extra_bits_tab[slot];
 
-       /* Read the number of extra bits and add them to the slot to form the
-        * final decoded value.  */
+       /* Read the number of extra bits and add them to the slot base to form
+        * the final decoded value.  */
        extra_bits = lzms_input_bitstream_read_bits(dec->is, num_extra_bits);
        return dec->slot_base_tab[slot] + extra_bits;
 }
@@ -852,11 +853,14 @@ lzms_init_range_decoder(struct lzms_range_decoder *dec,
 static void
 lzms_init_huffman_decoder(struct lzms_huffman_decoder *dec,
                          struct lzms_input_bitstream *is,
-                         const u32 *slot_base_tab, unsigned num_syms,
+                         const u32 *slot_base_tab,
+                         const u8 *extra_bits_tab,
+                         unsigned num_syms,
                          unsigned rebuild_freq)
 {
        dec->is = is;
        dec->slot_base_tab = slot_base_tab;
+       dec->extra_bits_tab = extra_bits_tab;
        dec->num_syms = num_syms;
        dec->num_syms_read = rebuild_freq;
        dec->rebuild_freq = rebuild_freq;
@@ -895,23 +899,29 @@ lzms_init_decompressor(struct lzms_decompressor *ctx,
        /* Initialize Huffman decoders for each alphabet used in the compressed
         * representation.  */
        lzms_init_huffman_decoder(&ctx->literal_decoder, &ctx->is,
-                                 NULL, LZMS_NUM_LITERAL_SYMS,
+                                 NULL, NULL, LZMS_NUM_LITERAL_SYMS,
                                  LZMS_LITERAL_CODE_REBUILD_FREQ);
 
        lzms_init_huffman_decoder(&ctx->lz_offset_decoder, &ctx->is,
-                                 lzms_position_slot_base, num_position_slots,
+                                 lzms_position_slot_base,
+                                 lzms_extra_position_bits,
+                                 num_position_slots,
                                  LZMS_LZ_OFFSET_CODE_REBUILD_FREQ);
 
        lzms_init_huffman_decoder(&ctx->length_decoder, &ctx->is,
-                                 lzms_length_slot_base, LZMS_NUM_LEN_SYMS,
+                                 lzms_length_slot_base,
+                                 lzms_extra_length_bits,
+                                 LZMS_NUM_LEN_SYMS,
                                  LZMS_LENGTH_CODE_REBUILD_FREQ);
 
        lzms_init_huffman_decoder(&ctx->delta_offset_decoder, &ctx->is,
-                                 lzms_position_slot_base, num_position_slots,
+                                 lzms_position_slot_base,
+                                 lzms_extra_position_bits,
+                                 num_position_slots,
                                  LZMS_DELTA_OFFSET_CODE_REBUILD_FREQ);
 
        lzms_init_huffman_decoder(&ctx->delta_power_decoder, &ctx->is,
-                                 NULL, LZMS_NUM_DELTA_POWER_SYMS,
+                                 NULL, NULL, LZMS_NUM_DELTA_POWER_SYMS,
                                  LZMS_DELTA_POWER_CODE_REBUILD_FREQ);
 
 
@@ -1031,8 +1041,8 @@ lzms_create_decompressor(size_t max_block_size,
        if (ctx == NULL)
                return WIMLIB_ERR_NOMEM;
 
-       /* Initialize position and length slot bases if not done already.  */
-       lzms_init_slot_bases();
+       /* Initialize position and length slot data if not done already.  */
+       lzms_init_slots();
 
        *ctx_ret = ctx;
        return 0;