]> wimlib.net Git - wimlib/blobdiff - src/lzms_compress.c
win32_apply: Stricter checks of resource format for WOF external backing
[wimlib] / src / lzms_compress.c
index caa93130c75ca44f656a1d8464621985d6eb3bd1..3a4647c44f87bd77140c496eb3ae5d351039a9ca 100644 (file)
@@ -307,7 +307,8 @@ struct lzms_compressor {
        u32 next_delta_hashes[NUM_POWERS_TO_CONSIDER];
 
        /* The per-byte graph nodes for near-optimal parsing  */
-       struct lzms_optimum_node optimum_nodes[NUM_OPTIM_NODES + MAX_FAST_LENGTH];
+       struct lzms_optimum_node optimum_nodes[NUM_OPTIM_NODES + MAX_FAST_LENGTH +
+                                              1 + MAX_FAST_LENGTH];
 
        /* Table: length => current cost for small match lengths  */
        u32 fast_length_cost_tab[MAX_FAST_LENGTH + 1];
@@ -946,7 +947,7 @@ lzms_encode_item_list(struct lzms_compressor *c,
 }
 
 /******************************************************************************
- *                             Cost evalution                                 *
+ *                             Cost evaluation                                *
  ******************************************************************************/
 
 /*
@@ -1126,7 +1127,7 @@ lzms_update_lru_queues(struct lzms_adaptive_state *state)
 static inline void
 lzms_update_state(u8 *state_p, int bit, unsigned num_states)
 {
-       *state_p = ((*state_p << 1) | bit) % num_states;
+       *state_p = ((*state_p << 1) | bit) & (num_states - 1);
 }
 
 static inline void
@@ -1201,7 +1202,7 @@ lzms_init_delta_matchfinder(struct lzms_compressor *c)
  * delta context with the specified @span.
  */
 static inline u32
-lzms_delta_hash(const u8 *p, u32 span)
+lzms_delta_hash(const u8 *p, const u32 pos, u32 span)
 {
        /* A delta match has a certain span and an offset that is a multiple of
         * that span.  To reduce wasted space we use a single combined hash
@@ -1213,7 +1214,7 @@ lzms_delta_hash(const u8 *p, u32 span)
        u8 d0 = *(p + 0) - *(p + 0 - span);
        u8 d1 = *(p + 1) - *(p + 1 - span);
        u8 d2 = *(p + 2) - *(p + 2 - span);
-       u32 v = ((span + ((u32)(uintptr_t)p & (span - 1))) << 24) |
+       u32 v = ((span + (pos & (span - 1))) << 24) |
                ((u32)d2 << 16) | ((u32)d1 << 8) | d0;
        return lz_hash(v, DELTA_HASH_ORDER);
 }
@@ -1249,7 +1250,7 @@ lzms_delta_matchfinder_skip_bytes(struct lzms_compressor *c,
                        const u32 span = (u32)1 << power;
                        if (unlikely(pos < span))
                                continue;
-                       const u32 next_hash = lzms_delta_hash(in_next + 1, span);
+                       const u32 next_hash = lzms_delta_hash(in_next + 1, pos + 1, span);
                        const u32 hash = c->next_delta_hashes[power];
                        c->delta_hash_table[hash] =
                                (power << DELTA_SOURCE_POWER_SHIFT) | pos;
@@ -1286,10 +1287,10 @@ lzms_skip_bytes(struct lzms_compressor *c, u32 count, const u8 *in_next)
  * can be reached using a match or literal from the current position.  This is
  * essentially Dijkstra's algorithm in disguise: the graph nodes are positions,
  * the graph edges are possible matches/literals to code, and the cost of each
- * edge is the estimated number of bits that will be required to output the
- * corresponding match or literal.  But one difference is that we actually
- * compute the lowest-cost path in pieces, where each piece is terminated when
- * there are no choices to be made.
+ * edge is the estimated number of bits (scaled up by COST_SHIFT) that will be
+ * required to output the corresponding match or literal.  But one difference is
+ * that we actually compute the lowest-cost path in pieces, where each piece is
+ * terminated when there are no choices to be made.
  *
  * The costs of literals and matches are estimated using the range encoder
  * states and the semi-adaptive Huffman codes.  Except for range encoding
@@ -1726,7 +1727,7 @@ begin:
                                if (unlikely(pos < span))
                                        continue;
 
-                               const u32 next_hash = lzms_delta_hash(in_next + 1, span);
+                               const u32 next_hash = lzms_delta_hash(in_next + 1, pos + 1, span);
                                const u32 hash = c->next_delta_hashes[power];
                                const u32 cur_match = c->delta_hash_table[hash];
 
@@ -1759,11 +1760,16 @@ begin:
                                /* Extend the delta match to its full length.  */
                                const u32 len = lzms_extend_delta_match(in_next,
                                                                        matchptr,
-                                                                       3,
+                                                                       NBYTES_HASHED_FOR_DELTA,
                                                                        in_end - in_next,
                                                                        span);
 
                                const u32 raw_offset = offset >> power;
+
+                               if (unlikely(raw_offset > DELTA_SOURCE_RAW_OFFSET_MASK -
+                                                         (LZMS_NUM_DELTA_REPS - 1)))
+                                       continue;
+
                                const u32 pair = (power << DELTA_SOURCE_POWER_SHIFT) |
                                                 raw_offset;
                                const u32 source = DELTA_SOURCE_TAG |
@@ -1889,7 +1895,7 @@ begin:
                 * Finalize the adaptive state that results from taking this
                 * lowest-cost path.  */
                struct lzms_item item_to_take = cur_node->item;
-               struct lzms_optimum_node *source_node = cur_node - (item_to_take.length);
+               struct lzms_optimum_node *source_node = cur_node - item_to_take.length;
                int next_item_idx = -1;
                for (unsigned i = 0; i < cur_node->num_extra_items; i++) {
                        item_to_take = cur_node->extra_items[i];
@@ -1916,9 +1922,9 @@ begin:
 
                                        if (source >= LZMS_NUM_DELTA_REPS) {
                                                /* Explicit offset delta match  */
-                                               u32 pair = source - (LZMS_NUM_DELTA_REPS - 1);
                                                lzms_update_delta_state(&cur_node->state, 0);
-                                               cur_node->state.upcoming_delta_pair = pair;
+                                               cur_node->state.upcoming_delta_pair =
+                                                       source - (LZMS_NUM_DELTA_REPS - 1);
                                        } else {
                                                /* Repeat offset delta match  */
                                                int rep_idx = source;