static inline void
lzms_update_state(u8 *state_p, int bit, unsigned num_states)
{
- *state_p = ((*state_p << 1) | bit) % num_states;
+ *state_p = ((*state_p << 1) | bit) & (num_states - 1);
}
static inline void
* delta context with the specified @span.
*/
static inline u32
-lzms_delta_hash(const u8 *p, u32 span)
+lzms_delta_hash(const u8 *p, const u32 pos, u32 span)
{
/* A delta match has a certain span and an offset that is a multiple of
* that span. To reduce wasted space we use a single combined hash
u8 d0 = *(p + 0) - *(p + 0 - span);
u8 d1 = *(p + 1) - *(p + 1 - span);
u8 d2 = *(p + 2) - *(p + 2 - span);
- u32 v = ((span + ((u32)(uintptr_t)p & (span - 1))) << 24) |
+ u32 v = ((span + (pos & (span - 1))) << 24) |
((u32)d2 << 16) | ((u32)d1 << 8) | d0;
return lz_hash(v, DELTA_HASH_ORDER);
}
const u32 span = (u32)1 << power;
if (unlikely(pos < span))
continue;
- const u32 next_hash = lzms_delta_hash(in_next + 1, span);
+ const u32 next_hash = lzms_delta_hash(in_next + 1, pos + 1, span);
const u32 hash = c->next_delta_hashes[power];
c->delta_hash_table[hash] =
(power << DELTA_SOURCE_POWER_SHIFT) | pos;
* can be reached using a match or literal from the current position. This is
* essentially Dijkstra's algorithm in disguise: the graph nodes are positions,
* the graph edges are possible matches/literals to code, and the cost of each
- * edge is the estimated number of bits that will be required to output the
- * corresponding match or literal. But one difference is that we actually
- * compute the lowest-cost path in pieces, where each piece is terminated when
- * there are no choices to be made.
+ * edge is the estimated number of bits (scaled up by COST_SHIFT) that will be
+ * required to output the corresponding match or literal. But one difference is
+ * that we actually compute the lowest-cost path in pieces, where each piece is
+ * terminated when there are no choices to be made.
*
* The costs of literals and matches are estimated using the range encoder
* states and the semi-adaptive Huffman codes. Except for range encoding
if (unlikely(pos < span))
continue;
- const u32 next_hash = lzms_delta_hash(in_next + 1, span);
+ const u32 next_hash = lzms_delta_hash(in_next + 1, pos + 1, span);
const u32 hash = c->next_delta_hashes[power];
const u32 cur_match = c->delta_hash_table[hash];
/* Extend the delta match to its full length. */
const u32 len = lzms_extend_delta_match(in_next,
matchptr,
- 3,
+ NBYTES_HASHED_FOR_DELTA,
in_end - in_next,
span);
* Finalize the adaptive state that results from taking this
* lowest-cost path. */
struct lzms_item item_to_take = cur_node->item;
- struct lzms_optimum_node *source_node = cur_node - (item_to_take.length);
+ struct lzms_optimum_node *source_node = cur_node - item_to_take.length;
int next_item_idx = -1;
for (unsigned i = 0; i < cur_node->num_extra_items; i++) {
item_to_take = cur_node->extra_items[i];
if (source >= LZMS_NUM_DELTA_REPS) {
/* Explicit offset delta match */
- u32 pair = source - (LZMS_NUM_DELTA_REPS - 1);
lzms_update_delta_state(&cur_node->state, 0);
- cur_node->state.upcoming_delta_pair = pair;
+ cur_node->state.upcoming_delta_pair =
+ source - (LZMS_NUM_DELTA_REPS - 1);
} else {
/* Repeat offset delta match */
int rep_idx = source;