Rename _full_path => d_full_path

[wimlib] / src / lzms_compress.c
diff --git a/src/lzms_compress.c b/src/lzms_compress.c

index abf39f5e89f194d5ff0fb087b28a3c4c73b57100..3a4647c44f87bd77140c496eb3ae5d351039a9ca 100644 (file)
--- a/src/lzms_compress.c
+++ b/src/lzms_compress.c
@@ -287,6 +287,10 @@ struct lzms_compressor {
          */
         bool use_delta_matches;
  
+       /* If true, the compressor need not preserve the input buffer if it
+        * compresses the data successfully.  */
+       bool destructive;
+
         /* 'last_target_usages' is a large array that is only needed for
          * preprocessing, so it is in union with fields that don't need to be
          * initialized until after preprocessing.  */
@@ -303,7 +307,8 @@ struct lzms_compressor {
         u32 next_delta_hashes[NUM_POWERS_TO_CONSIDER];
  
         /* The per-byte graph nodes for near-optimal parsing  */
-       struct lzms_optimum_node optimum_nodes[NUM_OPTIM_NODES + MAX_FAST_LENGTH];
+       struct lzms_optimum_node optimum_nodes[NUM_OPTIM_NODES + MAX_FAST_LENGTH +
+                                              1 + MAX_FAST_LENGTH];
  
         /* Table: length => current cost for small match lengths  */
         u32 fast_length_cost_tab[MAX_FAST_LENGTH + 1];
@@ -942,7 +947,7 @@ lzms_encode_item_list(struct lzms_compressor *c,
  }
  
  /******************************************************************************
- *                             Cost evalution                                 *
+ *                             Cost evaluation                                *
   ******************************************************************************/
  
  /*
@@ -1122,7 +1127,7 @@ lzms_update_lru_queues(struct lzms_adaptive_state *state)
  static inline void
  lzms_update_state(u8 *state_p, int bit, unsigned num_states)
  {
-       *state_p = ((*state_p << 1) | bit) % num_states;
+       *state_p = ((*state_p << 1) | bit) & (num_states - 1);
  }
  
  static inline void
@@ -1197,7 +1202,7 @@ lzms_init_delta_matchfinder(struct lzms_compressor *c)
   * delta context with the specified @span.
   */
  static inline u32
-lzms_delta_hash(const u8 *p, u32 span)
+lzms_delta_hash(const u8 *p, const u32 pos, u32 span)
  {
         /* A delta match has a certain span and an offset that is a multiple of
          * that span.  To reduce wasted space we use a single combined hash
@@ -1209,7 +1214,7 @@ lzms_delta_hash(const u8 *p, u32 span)
         u8 d0 = *(p + 0) - *(p + 0 - span);
         u8 d1 = *(p + 1) - *(p + 1 - span);
         u8 d2 = *(p + 2) - *(p + 2 - span);
-       u32 v = ((span + ((u32)(uintptr_t)p & (span - 1))) << 24) |
+       u32 v = ((span + (pos & (span - 1))) << 24) |
                 ((u32)d2 << 16) | ((u32)d1 << 8) | d0;
         return lz_hash(v, DELTA_HASH_ORDER);
  }
@@ -1245,7 +1250,7 @@ lzms_delta_matchfinder_skip_bytes(struct lzms_compressor *c,
                         const u32 span = (u32)1 << power;
                         if (unlikely(pos < span))
                                 continue;
-                       const u32 next_hash = lzms_delta_hash(in_next + 1, span);
+                       const u32 next_hash = lzms_delta_hash(in_next + 1, pos + 1, span);
                         const u32 hash = c->next_delta_hashes[power];
                         c->delta_hash_table[hash] =
                                 (power << DELTA_SOURCE_POWER_SHIFT) | pos;
@@ -1282,10 +1287,10 @@ lzms_skip_bytes(struct lzms_compressor *c, u32 count, const u8 *in_next)
   * can be reached using a match or literal from the current position.  This is
   * essentially Dijkstra's algorithm in disguise: the graph nodes are positions,
   * the graph edges are possible matches/literals to code, and the cost of each
- * edge is the estimated number of bits that will be required to output the
- * corresponding match or literal.  But one difference is that we actually
- * compute the lowest-cost path in pieces, where each piece is terminated when
- * there are no choices to be made.
+ * edge is the estimated number of bits (scaled up by COST_SHIFT) that will be
+ * required to output the corresponding match or literal.  But one difference is
+ * that we actually compute the lowest-cost path in pieces, where each piece is
+ * terminated when there are no choices to be made.
   *
   * The costs of literals and matches are estimated using the range encoder
   * states and the semi-adaptive Huffman codes.  Except for range encoding
@@ -1722,7 +1727,7 @@ begin:
                                 if (unlikely(pos < span))
                                         continue;
  
-                               const u32 next_hash = lzms_delta_hash(in_next + 1, span);
+                               const u32 next_hash = lzms_delta_hash(in_next + 1, pos + 1, span);
                                 const u32 hash = c->next_delta_hashes[power];
                                 const u32 cur_match = c->delta_hash_table[hash];
  
@@ -1755,11 +1760,16 @@ begin:
                                 /* Extend the delta match to its full length.  */
                                 const u32 len = lzms_extend_delta_match(in_next,
                                                                         matchptr,
-                                                                       3,
+                                                                       NBYTES_HASHED_FOR_DELTA,
                                                                         in_end - in_next,
                                                                         span);
  
                                 const u32 raw_offset = offset >> power;
+
+                               if (unlikely(raw_offset > DELTA_SOURCE_RAW_OFFSET_MASK -
+                                                         (LZMS_NUM_DELTA_REPS - 1)))
+                                       continue;
+
                                 const u32 pair = (power << DELTA_SOURCE_POWER_SHIFT) |
                                                  raw_offset;
                                 const u32 source = DELTA_SOURCE_TAG |
@@ -1885,7 +1895,7 @@ begin:
                  * Finalize the adaptive state that results from taking this
                  * lowest-cost path.  */
                 struct lzms_item item_to_take = cur_node->item;
-               struct lzms_optimum_node *source_node = cur_node - (item_to_take.length);
+               struct lzms_optimum_node *source_node = cur_node - item_to_take.length;
                 int next_item_idx = -1;
                 for (unsigned i = 0; i < cur_node->num_extra_items; i++) {
                         item_to_take = cur_node->extra_items[i];
@@ -1912,9 +1922,9 @@ begin:
  
                                         if (source >= LZMS_NUM_DELTA_REPS) {
                                                 /* Explicit offset delta match  */
-                                               u32 pair = source - (LZMS_NUM_DELTA_REPS - 1);
                                                 lzms_update_delta_state(&cur_node->state, 0);
-                                               cur_node->state.upcoming_delta_pair = pair;
+                                               cur_node->state.upcoming_delta_pair =
+                                                       source - (LZMS_NUM_DELTA_REPS - 1);
                                         } else {
                                                 /* Repeat offset delta match  */
                                                 int rep_idx = source;
@@ -2100,7 +2110,8 @@ lzms_finalize(struct lzms_compressor *c)
  }
  
  static u64
-lzms_get_needed_memory(size_t max_bufsize, unsigned compression_level)
+lzms_get_needed_memory(size_t max_bufsize, unsigned compression_level,
+                      bool destructive)
  {
         u64 size = 0;
  
@@ -2109,8 +2120,8 @@ lzms_get_needed_memory(size_t max_bufsize, unsigned compression_level)
  
         size += sizeof(struct lzms_compressor);
  
-       /* in_buffer */
-       size += max_bufsize;
+       if (!destructive)
+               size += max_bufsize; /* in_buffer */
  
         /* mf */
         size += lcpit_matchfinder_get_needed_memory(max_bufsize);
@@ -2120,7 +2131,7 @@ lzms_get_needed_memory(size_t max_bufsize, unsigned compression_level)
  
  static int
  lzms_create_compressor(size_t max_bufsize, unsigned compression_level,
-                      void **c_ret)
+                      bool destructive, void **c_ret)
  {
         struct lzms_compressor *c;
         u32 nice_match_len;
@@ -2132,6 +2143,8 @@ lzms_create_compressor(size_t max_bufsize, unsigned compression_level,
         if (!c)
                 goto oom0;
  
+       c->destructive = destructive;
+
         /* Scale nice_match_len with the compression level.  But to allow an
          * optimization for length cost calculations, don't allow nice_match_len
          * to exceed MAX_FAST_LENGTH.  */
@@ -2142,9 +2155,11 @@ lzms_create_compressor(size_t max_bufsize, unsigned compression_level,
         c->try_lit_lzrep0 = (compression_level >= 60);
         c->try_lzrep_lit_lzrep0 = (compression_level >= 60);
  
-       c->in_buffer = MALLOC(max_bufsize);
-       if (!c->in_buffer)
-               goto oom1;
+       if (!c->destructive) {
+               c->in_buffer = MALLOC(max_bufsize);
+               if (!c->in_buffer)
+                       goto oom1;
+       }
  
         if (!lcpit_matchfinder_init(&c->mf, max_bufsize, 2, nice_match_len))
                 goto oom2;
@@ -2156,7 +2171,8 @@ lzms_create_compressor(size_t max_bufsize, unsigned compression_level,
         return 0;
  
  oom2:
-       FREE(c->in_buffer);
+       if (!c->destructive)
+               FREE(c->in_buffer);
  oom1:
         ALIGNED_FREE(c);
  oom0:
@@ -2168,13 +2184,17 @@ lzms_compress(const void *in, size_t in_nbytes,
               void *out, size_t out_nbytes_avail, void *_c)
  {
         struct lzms_compressor *c = _c;
+       size_t result;
  
         /* Don't bother trying to compress extremely small inputs.  */
         if (in_nbytes < 4)
                 return 0;
  
         /* Copy the input data into the internal buffer and preprocess it.  */
-       memcpy(c->in_buffer, in, in_nbytes);
+       if (c->destructive)
+               c->in_buffer = (void *)in;
+       else
+               memcpy(c->in_buffer, in, in_nbytes);
         c->in_nbytes = in_nbytes;
         lzms_x86_filter(c->in_buffer, in_nbytes, c->last_target_usages, false);
  
@@ -2187,13 +2207,16 @@ lzms_compress(const void *in, size_t in_nbytes,
         lzms_range_encoder_init(&c->rc, out, out_nbytes_avail / sizeof(le16));
         lzms_output_bitstream_init(&c->os, out, out_nbytes_avail / sizeof(le16));
         lzms_init_states_and_probabilities(c);
-       lzms_init_huffman_codes(c, lzms_get_num_offset_slots(in_nbytes));
+       lzms_init_huffman_codes(c, lzms_get_num_offset_slots(c->in_nbytes));
  
         /* The main loop: parse and encode.  */
         lzms_near_optimal_parse(c);
  
         /* Return the compressed data size or 0.  */
-       return lzms_finalize(c);
+       result = lzms_finalize(c);
+       if (!result && c->destructive)
+               lzms_x86_filter(c->in_buffer, c->in_nbytes, c->last_target_usages, true);
+       return result;
  }
  
  static void
@@ -2201,7 +2224,8 @@ lzms_free_compressor(void *_c)
  {
         struct lzms_compressor *c = _c;
  
-       FREE(c->in_buffer);
+       if (!c->destructive)
+               FREE(c->in_buffer);
         lcpit_matchfinder_destroy(&c->mf);
         ALIGNED_FREE(c);
  }