#define MIN_LEVEL_FOR_NEAR_OPTIMAL 60
/*
- * The maximum window order for the matchfinder. This must be the base 2
- * logarithm of the maximum buffer size.
+ * Matchfinder definitions. For XPRESS, only a 16-bit matchfinder is needed.
*/
-#define MATCHFINDER_MAX_WINDOW_ORDER 16
+#define mf_pos_t u16
+#define MF_SUFFIX
/*
* Note: although XPRESS can potentially use a sliding window, it isn't well
* optimizations.
*/
-#include <string.h>
-
#include "wimlib/bitops.h"
#include "wimlib/compress_common.h"
#include "wimlib/compressor_ops.h"
struct xpress_optimum_node *optimum_nodes,
size_t count, const u32 codewords[], const u8 lens[])
{
- struct xpress_optimum_node *cur_optimum_ptr = optimum_nodes;
- struct xpress_optimum_node *end_optimum_ptr = optimum_nodes + count;
+ struct xpress_optimum_node *cur_node = optimum_nodes;
+ struct xpress_optimum_node *end_node = optimum_nodes + count;
do {
- unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
- unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+ unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+ unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
if (length == 1) {
/* Literal */
} else {
/* Match */
unsigned adjusted_len;
- unsigned offset_high_bit;
+ unsigned log2_offset;
unsigned len_hdr;
unsigned sym;
adjusted_len = length - XPRESS_MIN_MATCH_LEN;
- offset_high_bit = fls32(offset);
+ log2_offset = fls32(offset);
len_hdr = min(0xF, adjusted_len);
- sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+ sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
xpress_write_bits(os, codewords[sym], lens[sym]);
xpress_write_extra_length_bytes(os, adjusted_len);
- xpress_write_bits(os, offset - (1U << offset_high_bit),
- offset_high_bit);
+ xpress_write_bits(os, offset - (1U << log2_offset),
+ log2_offset);
}
- cur_optimum_ptr += length;
- } while (cur_optimum_ptr != end_optimum_ptr);
+ cur_node += length;
+ } while (cur_node != end_node);
}
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
{
unsigned adjusted_len = length - XPRESS_MIN_MATCH_LEN;
unsigned len_hdr = min(adjusted_len, 0xF);
- unsigned offset_high_bit = fls32(offset);
- unsigned sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+ unsigned log2_offset = fls32(offset);
+ unsigned sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
c->freqs[sym]++;
return (struct xpress_item) {
.data = (u64)sym |
((u64)adjusted_len << 9) |
- ((u64)offset_high_bit << 25) |
- ((u64)(offset ^ (1U << offset_high_bit)) << 29),
+ ((u64)log2_offset << 25) |
+ ((u64)(offset ^ (1U << log2_offset)) << 29),
};
}
*/
static void
xpress_tally_item_list(struct xpress_compressor *c,
- struct xpress_optimum_node *end_optimum_ptr)
+ struct xpress_optimum_node *end_node)
{
- struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes;
+ struct xpress_optimum_node *cur_node = c->optimum_nodes;
do {
- unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
- unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+ unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+ unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
if (length == 1) {
/* Literal */
} else {
/* Match */
unsigned adjusted_len;
- unsigned offset_high_bit;
+ unsigned log2_offset;
unsigned len_hdr;
unsigned sym;
adjusted_len = length - XPRESS_MIN_MATCH_LEN;
- offset_high_bit = fls32(offset);
+ log2_offset = fls32(offset);
len_hdr = min(0xF, adjusted_len);
- sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+ sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
c->freqs[sym]++;
}
- cur_optimum_ptr += length;
- } while (cur_optimum_ptr != end_optimum_ptr);
+ cur_node += length;
+ } while (cur_node != end_node);
}
/*
xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
struct lz_match *end_cache_ptr)
{
- struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes + in_nbytes;
+ struct xpress_optimum_node *cur_node = c->optimum_nodes + in_nbytes;
struct lz_match *cache_ptr = end_cache_ptr;
- cur_optimum_ptr->cost_to_end = 0;
+ cur_node->cost_to_end = 0;
do {
unsigned literal;
u32 best_item;
struct lz_match *match;
unsigned len;
- cur_optimum_ptr--;
+ cur_node--;
cache_ptr--;
literal = cache_ptr->offset;
/* Consider coding a literal. */
best_item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
best_cost_to_end = c->costs[literal] +
- (cur_optimum_ptr + 1)->cost_to_end;
+ (cur_node + 1)->cost_to_end;
num_matches = cache_ptr->length;
if (num_matches == 0) {
/* No matches; the only choice is the literal. */
- cur_optimum_ptr->cost_to_end = best_cost_to_end;
- cur_optimum_ptr->item = best_item;
+ cur_node->cost_to_end = best_cost_to_end;
+ cur_node->item = best_item;
continue;
}
/* All lengths are small. Optimize accordingly. */
do {
unsigned offset;
- unsigned offset_high_bit;
+ unsigned log2_offset;
u32 offset_cost;
offset = match->offset;
- offset_high_bit = fls32(offset);
- offset_cost = offset_high_bit;
+ log2_offset = fls32(offset);
+ offset_cost = log2_offset;
do {
unsigned len_hdr;
unsigned sym;
len_hdr = len - XPRESS_MIN_MATCH_LEN;
sym = XPRESS_NUM_CHARS +
- ((offset_high_bit << 4) | len_hdr);
+ ((log2_offset << 4) | len_hdr);
cost_to_end =
offset_cost + c->costs[sym] +
- (cur_optimum_ptr + len)->cost_to_end;
+ (cur_node + len)->cost_to_end;
if (cost_to_end < best_cost_to_end) {
best_cost_to_end = cost_to_end;
best_item =
/* Some lengths are big. */
do {
unsigned offset;
- unsigned offset_high_bit;
+ unsigned log2_offset;
u32 offset_cost;
offset = match->offset;
- offset_high_bit = fls32(offset);
- offset_cost = offset_high_bit;
+ log2_offset = fls32(offset);
+ offset_cost = log2_offset;
do {
unsigned adjusted_len;
unsigned len_hdr;
adjusted_len = len - XPRESS_MIN_MATCH_LEN;
len_hdr = min(adjusted_len, 0xF);
sym = XPRESS_NUM_CHARS +
- ((offset_high_bit << 4) | len_hdr);
+ ((log2_offset << 4) | len_hdr);
cost_to_end =
offset_cost + c->costs[sym] +
- (cur_optimum_ptr + len)->cost_to_end;
+ (cur_node + len)->cost_to_end;
if (adjusted_len >= 0xF) {
cost_to_end += 8;
if (adjusted_len - 0xF >= 0xFF)
} while (++match != cache_ptr);
}
cache_ptr -= num_matches;
- cur_optimum_ptr->cost_to_end = best_cost_to_end;
- cur_optimum_ptr->item = best_item;
- } while (cur_optimum_ptr != c->optimum_nodes);
+ cur_node->cost_to_end = best_cost_to_end;
+ cur_node->item = best_item;
+ } while (cur_node != c->optimum_nodes);
}
/*
{
const u8 * const in_begin = in;
const u8 *in_next = in_begin;
- const u8 * const in_end = in_begin + in_nbytes;
struct lz_match *cache_ptr = c->match_cache;
- u32 next_hash;
+ u32 next_hashes[2] = {};
+ u32 max_len = in_nbytes;
+ u32 nice_len = min(max_len, c->nice_match_length);
bt_matchfinder_init(&c->bt_mf);
- next_hash = bt_matchfinder_hash_3_bytes(in_next);
- do {
+ for (;;) {
struct lz_match *matches;
- unsigned best_len;
+ u32 best_len;
/* If we've found so many matches that the cache might overflow
* if we keep finding more, then stop finding matches. This
* case is very unlikely. */
- if (unlikely(cache_ptr >= c->cache_overflow_mark)) {
- do {
- cache_ptr->length = 0;
- cache_ptr->offset = *in_next++;
- cache_ptr++;
- } while (in_next != in_end);
- return cache_ptr;
- }
+ if (unlikely(cache_ptr >= c->cache_overflow_mark || max_len < 5))
+ break;
matches = cache_ptr;
cache_ptr =
bt_matchfinder_get_matches(&c->bt_mf,
in_begin,
- in_next,
- XPRESS_MIN_MATCH_LEN,
- in_end - in_next,
- min(in_end - in_next, c->nice_match_length),
+ in_next - in_begin,
+ max_len,
+ nice_len,
c->max_search_depth,
- &next_hash,
+ next_hashes,
&best_len,
cache_ptr);
cache_ptr->length = cache_ptr - matches;
- cache_ptr->offset = *in_next;
- in_next++;
+ cache_ptr->offset = *in_next++;
cache_ptr++;
+ max_len--;
+ nice_len = min(nice_len, max_len);
/*
* If there was a very long match found, then don't cache any
* very much. If there's a long match, then the data must be
* highly compressible, so it doesn't matter as much what we do.
*/
- if (best_len >= c->nice_match_length) {
+ if (best_len >= nice_len) {
+ if (unlikely(best_len + 5 >= max_len))
+ break;
--best_len;
do {
bt_matchfinder_skip_position(&c->bt_mf,
in_begin,
- in_next,
- in_end,
- min(in_end - in_next,
- c->nice_match_length),
+ in_next - in_begin,
+ max_len,
+ nice_len,
c->max_search_depth,
- &next_hash);
-
+ next_hashes);
cache_ptr->length = 0;
cache_ptr->offset = *in_next++;
cache_ptr++;
+ max_len--;
+ nice_len = min(nice_len, max_len);
} while (--best_len);
}
- } while (in_next != in_end);
+ }
+
+ while (max_len--) {
+ cache_ptr->length = 0;
+ cache_ptr->offset = *in_next++;
+ cache_ptr++;
+ }
return cache_ptr;
}
if (compression_level < 30) {
c->impl = xpress_compress_greedy;
- c->max_search_depth = (compression_level * 24) / 16;
- c->nice_match_length = (compression_level * 48) / 16;
+ c->max_search_depth = (compression_level * 30) / 16;
+ c->nice_match_length = (compression_level * 60) / 16;
} else {
c->impl = xpress_compress_lazy;
- c->max_search_depth = (compression_level * 24) / 32;
- c->nice_match_length = (compression_level * 48) / 32;
+ c->max_search_depth = (compression_level * 30) / 32;
+ c->nice_match_length = (compression_level * 60) / 32;
/* xpress_compress_lazy() needs max_search_depth >= 2
* because it halves the max_search_depth when
&c->match_cache[max_bufsize * CACHE_RESERVE_PER_POS];
c->impl = xpress_compress_near_optimal;
- c->max_search_depth = (compression_level * 32) / 100;
- c->nice_match_length = (compression_level * 50) / 100;
+ c->max_search_depth = (compression_level * 28) / 100;
+ c->nice_match_length = (compression_level * 56) / 100;
c->num_optim_passes = compression_level / 40;
}
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */