]> wimlib.net Git - wimlib/blobdiff - src/lzms-compress.c
LZMS: decompression optimizations
[wimlib] / src / lzms-compress.c
index da1904fc9f66cf8927db8a08cc31176a7e9c4c51..5f5e37412db828a0c1a96d667b9e854e4b5a0959 100644 (file)
@@ -32,6 +32,7 @@
 #include "wimlib/lz_mf.h"
 #include "wimlib/lz_repsearch.h"
 #include "wimlib/lzms.h"
+#include "wimlib/unaligned.h"
 #include "wimlib/util.h"
 
 #include <string.h>
@@ -205,6 +206,33 @@ struct lzms_compressor {
        u8 offset_slot_fast[LZMS_NUM_FAST_OFFSETS];
 };
 
+struct lzms_lz_lru_queue {
+       u32 recent_offsets[LZMS_NUM_RECENT_OFFSETS + 1];
+       u32 prev_offset;
+       u32 upcoming_offset;
+};
+
+static void
+lzms_init_lz_lru_queue(struct lzms_lz_lru_queue *queue)
+{
+       for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS + 1; i++)
+               queue->recent_offsets[i] = i + 1;
+
+       queue->prev_offset = 0;
+       queue->upcoming_offset = 0;
+}
+
+static void
+lzms_update_lz_lru_queue(struct lzms_lz_lru_queue *queue)
+{
+       if (queue->prev_offset != 0) {
+               for (int i = LZMS_NUM_RECENT_OFFSETS - 1; i >= 0; i--)
+                       queue->recent_offsets[i + 1] = queue->recent_offsets[i];
+               queue->recent_offsets[0] = queue->prev_offset;
+       }
+       queue->prev_offset = queue->upcoming_offset;
+}
+
 /*
  * Match chooser position data:
  *
@@ -257,7 +285,7 @@ struct lzms_mc_pos_data {
         * entries or current Huffman codewords.  Those aren't maintained
         * per-position and are only updated occassionally.  */
        struct lzms_adaptive_state {
-               struct lzms_lz_lru_queues lru;
+               struct lzms_lz_lru_queue lru;
                u8 main_state;
                u8 match_state;
                u8 lz_match_state;
@@ -338,7 +366,7 @@ lzms_output_bitstream_put_varbits(struct lzms_output_bitstream *os,
 
                /* Write a coding unit, unless it would underflow the buffer. */
                if (os->next != os->begin)
-                       *--os->next = cpu_to_le16(os->bitbuf >> os->bitcount);
+                       put_unaligned_u16_le(os->bitbuf >> os->bitcount, --os->next);
 
                /* Optimization for call sites that never write more than 16
                 * bits at once.  */
@@ -357,7 +385,7 @@ lzms_output_bitstream_flush(struct lzms_output_bitstream *os)
                return false;
 
        if (os->bitcount != 0)
-               *--os->next = cpu_to_le16(os->bitbuf << (16 - os->bitcount));
+               put_unaligned_u16_le(os->bitbuf << (16 - os->bitcount), --os->next);
 
        return true;
 }
@@ -401,9 +429,11 @@ lzms_range_encoder_raw_shift_low(struct lzms_range_encoder_raw *rc)
                 * ((rc->low >> 32) != 0, a.k.a. the carry bit is 1).  */
                do {
                        if (likely(rc->next >= rc->begin)) {
-                               if (rc->next != rc->end)
-                                       *rc->next++ = cpu_to_le16(rc->cache +
-                                                                 (u16)(rc->low >> 32));
+                               if (rc->next != rc->end) {
+                                       put_unaligned_u16_le(rc->cache +
+                                                            (u16)(rc->low >> 32),
+                                                            rc->next++);
+                               }
                        } else {
                                rc->next++;
                        }
@@ -895,7 +925,7 @@ lzms_init_adaptive_state(struct lzms_adaptive_state *state)
 {
        unsigned i;
 
-       lzms_init_lz_lru_queues(&state->lru);
+       lzms_init_lz_lru_queue(&state->lru);
        state->main_state = 0;
        state->match_state = 0;
        state->lz_match_state = 0;
@@ -1231,10 +1261,7 @@ lzms_init_range_encoder(struct lzms_range_encoder *enc,
        enc->state = 0;
        LZMS_ASSERT(is_power_of_2(num_states));
        enc->mask = num_states - 1;
-       for (u32 i = 0; i < num_states; i++) {
-               enc->prob_entries[i].num_recent_zero_bits = LZMS_INITIAL_PROBABILITY;
-               enc->prob_entries[i].recent_bits = LZMS_INITIAL_RECENT_BITS;
-       }
+       lzms_init_probability_entries(enc->prob_entries, num_states);
 }
 
 static void
@@ -1288,7 +1315,7 @@ lzms_prepare_compressor(struct lzms_compressor *c, const u8 *udata, u32 ulen,
                                  LZMS_LZ_OFFSET_CODE_REBUILD_FREQ);
 
        lzms_init_huffman_encoder(&c->length_encoder, &c->os,
-                                 LZMS_NUM_LEN_SYMS,
+                                 LZMS_NUM_LENGTH_SYMS,
                                  LZMS_LENGTH_CODE_REBUILD_FREQ);
 
        lzms_init_huffman_encoder(&c->delta_offset_encoder, &c->os,
@@ -1486,8 +1513,6 @@ lzms_create_compressor(size_t max_block_size, unsigned int compression_level,
                goto oom;
        c->optimum_end = &c->optimum[params.optim_array_length];
 
-       lzms_init_slots();
-
        lzms_init_rc_costs();
 
        lzms_init_fast_slots(c);