*/
bool use_delta_matches;
+ /* If true, the compressor need not preserve the input buffer if it
+ * compresses the data successfully. */
+ bool destructive;
+
/* 'last_target_usages' is a large array that is only needed for
* preprocessing, so it is in union with fields that don't need to be
* initialized until after preprocessing. */
u32 next_delta_hashes[NUM_POWERS_TO_CONSIDER];
/* The per-byte graph nodes for near-optimal parsing */
- struct lzms_optimum_node optimum_nodes[NUM_OPTIM_NODES + MAX_FAST_LENGTH];
+ struct lzms_optimum_node optimum_nodes[NUM_OPTIM_NODES + MAX_FAST_LENGTH +
+ 1 + MAX_FAST_LENGTH];
/* Table: length => current cost for small match lengths */
u32 fast_length_cost_tab[MAX_FAST_LENGTH + 1];
}
/******************************************************************************
- * Cost evalution *
+ * Cost evaluation *
******************************************************************************/
/*
static inline void
lzms_update_state(u8 *state_p, int bit, unsigned num_states)
{
- *state_p = ((*state_p << 1) | bit) % num_states;
+ *state_p = ((*state_p << 1) | bit) & (num_states - 1);
}
static inline void
* delta context with the specified @span.
*/
static inline u32
-lzms_delta_hash(const u8 *p, u32 span)
+lzms_delta_hash(const u8 *p, const u32 pos, u32 span)
{
/* A delta match has a certain span and an offset that is a multiple of
* that span. To reduce wasted space we use a single combined hash
u8 d0 = *(p + 0) - *(p + 0 - span);
u8 d1 = *(p + 1) - *(p + 1 - span);
u8 d2 = *(p + 2) - *(p + 2 - span);
- u32 v = ((span + ((u32)(uintptr_t)p & (span - 1))) << 24) |
+ u32 v = ((span + (pos & (span - 1))) << 24) |
((u32)d2 << 16) | ((u32)d1 << 8) | d0;
return lz_hash(v, DELTA_HASH_ORDER);
}
const u32 span = (u32)1 << power;
if (unlikely(pos < span))
continue;
- const u32 next_hash = lzms_delta_hash(in_next + 1, span);
+ const u32 next_hash = lzms_delta_hash(in_next + 1, pos + 1, span);
const u32 hash = c->next_delta_hashes[power];
c->delta_hash_table[hash] =
(power << DELTA_SOURCE_POWER_SHIFT) | pos;
* can be reached using a match or literal from the current position. This is
* essentially Dijkstra's algorithm in disguise: the graph nodes are positions,
* the graph edges are possible matches/literals to code, and the cost of each
- * edge is the estimated number of bits that will be required to output the
- * corresponding match or literal. But one difference is that we actually
- * compute the lowest-cost path in pieces, where each piece is terminated when
- * there are no choices to be made.
+ * edge is the estimated number of bits (scaled up by COST_SHIFT) that will be
+ * required to output the corresponding match or literal. But one difference is
+ * that we actually compute the lowest-cost path in pieces, where each piece is
+ * terminated when there are no choices to be made.
*
* The costs of literals and matches are estimated using the range encoder
* states and the semi-adaptive Huffman codes. Except for range encoding
if (unlikely(pos < span))
continue;
- const u32 next_hash = lzms_delta_hash(in_next + 1, span);
+ const u32 next_hash = lzms_delta_hash(in_next + 1, pos + 1, span);
const u32 hash = c->next_delta_hashes[power];
const u32 cur_match = c->delta_hash_table[hash];
/* Extend the delta match to its full length. */
const u32 len = lzms_extend_delta_match(in_next,
matchptr,
- 3,
+ NBYTES_HASHED_FOR_DELTA,
in_end - in_next,
span);
const u32 raw_offset = offset >> power;
+
+ if (unlikely(raw_offset > DELTA_SOURCE_RAW_OFFSET_MASK -
+ (LZMS_NUM_DELTA_REPS - 1)))
+ continue;
+
const u32 pair = (power << DELTA_SOURCE_POWER_SHIFT) |
raw_offset;
const u32 source = DELTA_SOURCE_TAG |
* Finalize the adaptive state that results from taking this
* lowest-cost path. */
struct lzms_item item_to_take = cur_node->item;
- struct lzms_optimum_node *source_node = cur_node - (item_to_take.length);
+ struct lzms_optimum_node *source_node = cur_node - item_to_take.length;
int next_item_idx = -1;
for (unsigned i = 0; i < cur_node->num_extra_items; i++) {
item_to_take = cur_node->extra_items[i];
if (source >= LZMS_NUM_DELTA_REPS) {
/* Explicit offset delta match */
- u32 pair = source - (LZMS_NUM_DELTA_REPS - 1);
lzms_update_delta_state(&cur_node->state, 0);
- cur_node->state.upcoming_delta_pair = pair;
+ cur_node->state.upcoming_delta_pair =
+ source - (LZMS_NUM_DELTA_REPS - 1);
} else {
/* Repeat offset delta match */
int rep_idx = source;
}
static u64
-lzms_get_needed_memory(size_t max_bufsize, unsigned compression_level)
+lzms_get_needed_memory(size_t max_bufsize, unsigned compression_level,
+ bool destructive)
{
u64 size = 0;
size += sizeof(struct lzms_compressor);
- /* in_buffer */
- size += max_bufsize;
+ if (!destructive)
+ size += max_bufsize; /* in_buffer */
/* mf */
size += lcpit_matchfinder_get_needed_memory(max_bufsize);
static int
lzms_create_compressor(size_t max_bufsize, unsigned compression_level,
- void **c_ret)
+ bool destructive, void **c_ret)
{
struct lzms_compressor *c;
u32 nice_match_len;
if (!c)
goto oom0;
+ c->destructive = destructive;
+
/* Scale nice_match_len with the compression level. But to allow an
* optimization for length cost calculations, don't allow nice_match_len
* to exceed MAX_FAST_LENGTH. */
c->try_lit_lzrep0 = (compression_level >= 60);
c->try_lzrep_lit_lzrep0 = (compression_level >= 60);
- c->in_buffer = MALLOC(max_bufsize);
- if (!c->in_buffer)
- goto oom1;
+ if (!c->destructive) {
+ c->in_buffer = MALLOC(max_bufsize);
+ if (!c->in_buffer)
+ goto oom1;
+ }
if (!lcpit_matchfinder_init(&c->mf, max_bufsize, 2, nice_match_len))
goto oom2;
return 0;
oom2:
- FREE(c->in_buffer);
+ if (!c->destructive)
+ FREE(c->in_buffer);
oom1:
ALIGNED_FREE(c);
oom0:
void *out, size_t out_nbytes_avail, void *_c)
{
struct lzms_compressor *c = _c;
+ size_t result;
/* Don't bother trying to compress extremely small inputs. */
if (in_nbytes < 4)
return 0;
/* Copy the input data into the internal buffer and preprocess it. */
- memcpy(c->in_buffer, in, in_nbytes);
+ if (c->destructive)
+ c->in_buffer = (void *)in;
+ else
+ memcpy(c->in_buffer, in, in_nbytes);
c->in_nbytes = in_nbytes;
lzms_x86_filter(c->in_buffer, in_nbytes, c->last_target_usages, false);
lzms_range_encoder_init(&c->rc, out, out_nbytes_avail / sizeof(le16));
lzms_output_bitstream_init(&c->os, out, out_nbytes_avail / sizeof(le16));
lzms_init_states_and_probabilities(c);
- lzms_init_huffman_codes(c, lzms_get_num_offset_slots(in_nbytes));
+ lzms_init_huffman_codes(c, lzms_get_num_offset_slots(c->in_nbytes));
/* The main loop: parse and encode. */
lzms_near_optimal_parse(c);
/* Return the compressed data size or 0. */
- return lzms_finalize(c);
+ result = lzms_finalize(c);
+ if (!result && c->destructive)
+ lzms_x86_filter(c->in_buffer, c->in_nbytes, c->last_target_usages, true);
+ return result;
}
static void
{
struct lzms_compressor *c = _c;
- FREE(c->in_buffer);
+ if (!c->destructive)
+ FREE(c->in_buffer);
lcpit_matchfinder_destroy(&c->mf);
ALIGNED_FREE(c);
}