#define MIN_LEVEL_FOR_NEAR_OPTIMAL 60
/*
- * The maximum window order for the matchfinder. This must be the base 2
- * logarithm of the maximum buffer size.
+ * Matchfinder definitions. For XPRESS, only a 16-bit matchfinder is needed.
*/
-#define MATCHFINDER_MAX_WINDOW_ORDER 16
+#define mf_pos_t u16
+#define MF_SUFFIX
/*
* Note: although XPRESS can potentially use a sliding window, it isn't well
* optimizations.
*/
-#include <string.h>
-
#include "wimlib/bitops.h"
#include "wimlib/compress_common.h"
#include "wimlib/compressor_ops.h"
struct xpress_optimum_node *optimum_nodes,
size_t count, const u32 codewords[], const u8 lens[])
{
- struct xpress_optimum_node *cur_optimum_ptr = optimum_nodes;
- struct xpress_optimum_node *end_optimum_ptr = optimum_nodes + count;
+ struct xpress_optimum_node *cur_node = optimum_nodes;
+ struct xpress_optimum_node *end_node = optimum_nodes + count;
do {
- unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
- unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+ unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+ unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
if (length == 1) {
/* Literal */
} else {
/* Match */
unsigned adjusted_len;
- unsigned offset_high_bit;
+ unsigned log2_offset;
unsigned len_hdr;
unsigned sym;
adjusted_len = length - XPRESS_MIN_MATCH_LEN;
- offset_high_bit = fls32(offset);
+ log2_offset = fls32(offset);
len_hdr = min(0xF, adjusted_len);
- sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+ sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
xpress_write_bits(os, codewords[sym], lens[sym]);
xpress_write_extra_length_bytes(os, adjusted_len);
- xpress_write_bits(os, offset - (1U << offset_high_bit),
- offset_high_bit);
+ xpress_write_bits(os, offset - (1U << log2_offset),
+ log2_offset);
}
- cur_optimum_ptr += length;
- } while (cur_optimum_ptr != end_optimum_ptr);
+ cur_node += length;
+ } while (cur_node != end_node);
}
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
{
unsigned adjusted_len = length - XPRESS_MIN_MATCH_LEN;
unsigned len_hdr = min(adjusted_len, 0xF);
- unsigned offset_high_bit = fls32(offset);
- unsigned sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+ unsigned log2_offset = fls32(offset);
+ unsigned sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
c->freqs[sym]++;
return (struct xpress_item) {
.data = (u64)sym |
((u64)adjusted_len << 9) |
- ((u64)offset_high_bit << 25) |
- ((u64)(offset ^ (1U << offset_high_bit)) << 29),
+ ((u64)log2_offset << 25) |
+ ((u64)(offset ^ (1U << log2_offset)) << 29),
};
}
const u8 * const in_end = in_begin + in_nbytes;
struct xpress_item *next_chosen_item = c->chosen_items;
unsigned len_3_too_far;
+ u32 next_hashes[2] = {};
if (in_nbytes <= 8192)
len_3_too_far = 2048;
length = hc_matchfinder_longest_match(&c->hc_mf,
in_begin,
- in_next,
+ in_next - in_begin,
XPRESS_MIN_MATCH_LEN - 1,
in_end - in_next,
min(in_end - in_next, c->nice_match_length),
c->max_search_depth,
+ next_hashes,
&offset);
if (length >= XPRESS_MIN_MATCH_LEN &&
!(length == XPRESS_MIN_MATCH_LEN && offset >= len_3_too_far))
in_next += 1;
hc_matchfinder_skip_positions(&c->hc_mf,
in_begin,
- in_next,
- in_end,
- length - 1);
+ in_next - in_begin,
+ in_end - in_begin,
+ length - 1,
+ next_hashes);
in_next += length - 1;
} else {
/* No match found */
const u8 * const in_end = in_begin + in_nbytes;
struct xpress_item *next_chosen_item = c->chosen_items;
unsigned len_3_too_far;
+ u32 next_hashes[2] = {};
if (in_nbytes <= 8192)
len_3_too_far = 2048;
/* Find the longest match at the current position. */
cur_len = hc_matchfinder_longest_match(&c->hc_mf,
in_begin,
- in_next,
+ in_next - in_begin,
XPRESS_MIN_MATCH_LEN - 1,
in_end - in_next,
min(in_end - in_next, c->nice_match_length),
c->max_search_depth,
+ next_hashes,
&cur_offset);
in_next += 1;
hc_matchfinder_skip_positions(&c->hc_mf,
in_begin,
- in_next,
- in_end,
- cur_len - 1);
+ in_next - in_begin,
+ in_end - in_begin,
+ cur_len - 1,
+ next_hashes);
in_next += cur_len - 1;
continue;
}
*/
next_len = hc_matchfinder_longest_match(&c->hc_mf,
in_begin,
- in_next,
+ in_next - in_begin,
cur_len,
in_end - in_next,
min(in_end - in_next, c->nice_match_length),
c->max_search_depth / 2,
+ next_hashes,
&next_offset);
in_next += 1;
xpress_record_match(c, cur_len, cur_offset);
hc_matchfinder_skip_positions(&c->hc_mf,
in_begin,
- in_next,
- in_end,
- cur_len - 2);
+ in_next - in_begin,
+ in_end - in_begin,
+ cur_len - 2,
+ next_hashes);
in_next += cur_len - 2;
continue;
}
*/
static void
xpress_tally_item_list(struct xpress_compressor *c,
- struct xpress_optimum_node *end_optimum_ptr)
+ struct xpress_optimum_node *end_node)
{
- struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes;
+ struct xpress_optimum_node *cur_node = c->optimum_nodes;
do {
- unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
- unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+ unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+ unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
if (length == 1) {
/* Literal */
} else {
/* Match */
unsigned adjusted_len;
- unsigned offset_high_bit;
+ unsigned log2_offset;
unsigned len_hdr;
unsigned sym;
adjusted_len = length - XPRESS_MIN_MATCH_LEN;
- offset_high_bit = fls32(offset);
+ log2_offset = fls32(offset);
len_hdr = min(0xF, adjusted_len);
- sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+ sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
c->freqs[sym]++;
}
- cur_optimum_ptr += length;
- } while (cur_optimum_ptr != end_optimum_ptr);
+ cur_node += length;
+ } while (cur_node != end_node);
}
/*
xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
struct lz_match *end_cache_ptr)
{
- struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes + in_nbytes;
+ struct xpress_optimum_node *cur_node = c->optimum_nodes + in_nbytes;
struct lz_match *cache_ptr = end_cache_ptr;
- cur_optimum_ptr->cost_to_end = 0;
+ cur_node->cost_to_end = 0;
do {
unsigned literal;
u32 best_item;
struct lz_match *match;
unsigned len;
- cur_optimum_ptr--;
+ cur_node--;
cache_ptr--;
literal = cache_ptr->offset;
/* Consider coding a literal. */
best_item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
best_cost_to_end = c->costs[literal] +
- (cur_optimum_ptr + 1)->cost_to_end;
+ (cur_node + 1)->cost_to_end;
num_matches = cache_ptr->length;
if (num_matches == 0) {
/* No matches; the only choice is the literal. */
- cur_optimum_ptr->cost_to_end = best_cost_to_end;
- cur_optimum_ptr->item = best_item;
+ cur_node->cost_to_end = best_cost_to_end;
+ cur_node->item = best_item;
continue;
}
/* All lengths are small. Optimize accordingly. */
do {
unsigned offset;
- unsigned offset_high_bit;
+ unsigned log2_offset;
u32 offset_cost;
offset = match->offset;
- offset_high_bit = fls32(offset);
- offset_cost = offset_high_bit;
+ log2_offset = fls32(offset);
+ offset_cost = log2_offset;
do {
unsigned len_hdr;
unsigned sym;
len_hdr = len - XPRESS_MIN_MATCH_LEN;
sym = XPRESS_NUM_CHARS +
- ((offset_high_bit << 4) | len_hdr);
+ ((log2_offset << 4) | len_hdr);
cost_to_end =
offset_cost + c->costs[sym] +
- (cur_optimum_ptr + len)->cost_to_end;
+ (cur_node + len)->cost_to_end;
if (cost_to_end < best_cost_to_end) {
best_cost_to_end = cost_to_end;
best_item =
/* Some lengths are big. */
do {
unsigned offset;
- unsigned offset_high_bit;
+ unsigned log2_offset;
u32 offset_cost;
offset = match->offset;
- offset_high_bit = fls32(offset);
- offset_cost = offset_high_bit;
+ log2_offset = fls32(offset);
+ offset_cost = log2_offset;
do {
unsigned adjusted_len;
unsigned len_hdr;
adjusted_len = len - XPRESS_MIN_MATCH_LEN;
len_hdr = min(adjusted_len, 0xF);
sym = XPRESS_NUM_CHARS +
- ((offset_high_bit << 4) | len_hdr);
+ ((log2_offset << 4) | len_hdr);
cost_to_end =
offset_cost + c->costs[sym] +
- (cur_optimum_ptr + len)->cost_to_end;
+ (cur_node + len)->cost_to_end;
if (adjusted_len >= 0xF) {
cost_to_end += 8;
if (adjusted_len - 0xF >= 0xFF)
} while (++match != cache_ptr);
}
cache_ptr -= num_matches;
- cur_optimum_ptr->cost_to_end = best_cost_to_end;
- cur_optimum_ptr->item = best_item;
- } while (cur_optimum_ptr != c->optimum_nodes);
+ cur_node->cost_to_end = best_cost_to_end;
+ cur_node->item = best_item;
+ } while (cur_node != c->optimum_nodes);
}
/*
{
const u8 * const in_begin = in;
const u8 *in_next = in_begin;
- const u8 * const in_end = in_begin + in_nbytes;
struct lz_match *cache_ptr = c->match_cache;
- u32 next_hash;
+ u32 next_hashes[2] = {};
+ u32 max_len = in_nbytes;
+ u32 nice_len = min(max_len, c->nice_match_length);
bt_matchfinder_init(&c->bt_mf);
- next_hash = bt_matchfinder_hash_3_bytes(in_next);
- do {
+ for (;;) {
struct lz_match *matches;
- unsigned best_len;
+ u32 best_len;
/* If we've found so many matches that the cache might overflow
* if we keep finding more, then stop finding matches. This
* case is very unlikely. */
- if (unlikely(cache_ptr >= c->cache_overflow_mark)) {
- do {
- cache_ptr->length = 0;
- cache_ptr->offset = *in_next++;
- cache_ptr++;
- } while (in_next != in_end);
- return cache_ptr;
- }
+ if (unlikely(cache_ptr >= c->cache_overflow_mark ||
+ max_len < BT_MATCHFINDER_REQUIRED_NBYTES))
+ break;
matches = cache_ptr;
cache_ptr =
bt_matchfinder_get_matches(&c->bt_mf,
in_begin,
- in_next,
- XPRESS_MIN_MATCH_LEN,
- in_end - in_next,
- min(in_end - in_next, c->nice_match_length),
+ in_next - in_begin,
+ max_len,
+ nice_len,
c->max_search_depth,
- &next_hash,
+ next_hashes,
&best_len,
cache_ptr);
cache_ptr->length = cache_ptr - matches;
- cache_ptr->offset = *in_next;
- in_next++;
+ cache_ptr->offset = *in_next++;
cache_ptr++;
+ max_len--;
+ nice_len = min(nice_len, max_len);
/*
* If there was a very long match found, then don't cache any
* very much. If there's a long match, then the data must be
* highly compressible, so it doesn't matter as much what we do.
*/
- if (best_len >= c->nice_match_length) {
+ if (best_len >= nice_len) {
+ if (unlikely(best_len +
+ BT_MATCHFINDER_REQUIRED_NBYTES >= max_len))
+ break;
--best_len;
do {
bt_matchfinder_skip_position(&c->bt_mf,
in_begin,
- in_next,
- in_end,
- min(in_end - in_next,
- c->nice_match_length),
+ in_next - in_begin,
+ max_len,
+ nice_len,
c->max_search_depth,
- &next_hash);
-
+ next_hashes);
cache_ptr->length = 0;
cache_ptr->offset = *in_next++;
cache_ptr++;
+ max_len--;
+ nice_len = min(nice_len, max_len);
} while (--best_len);
}
- } while (in_next != in_end);
+ }
+
+ while (max_len--) {
+ cache_ptr->length = 0;
+ cache_ptr->offset = *in_next++;
+ cache_ptr++;
+ }
return cache_ptr;
}
if (max_bufsize > XPRESS_MAX_BUFSIZE)
return WIMLIB_ERR_INVALID_PARAM;
- c = ALIGNED_MALLOC(xpress_get_compressor_size(max_bufsize, compression_level),
- MATCHFINDER_ALIGNMENT);
+ c = MALLOC(xpress_get_compressor_size(max_bufsize, compression_level));
if (!c)
goto oom0;
if (compression_level < 30) {
c->impl = xpress_compress_greedy;
- c->max_search_depth = (compression_level * 24) / 16;
- c->nice_match_length = (compression_level * 48) / 16;
+ c->max_search_depth = (compression_level * 30) / 16;
+ c->nice_match_length = (compression_level * 60) / 16;
} else {
c->impl = xpress_compress_lazy;
- c->max_search_depth = (compression_level * 24) / 32;
- c->nice_match_length = (compression_level * 48) / 32;
+ c->max_search_depth = (compression_level * 30) / 32;
+ c->nice_match_length = (compression_level * 60) / 32;
/* xpress_compress_lazy() needs max_search_depth >= 2
* because it halves the max_search_depth when
&c->match_cache[max_bufsize * CACHE_RESERVE_PER_POS];
c->impl = xpress_compress_near_optimal;
- c->max_search_depth = (compression_level * 32) / 100;
- c->nice_match_length = (compression_level * 50) / 100;
+ c->max_search_depth = (compression_level * 28) / 100;
+ c->nice_match_length = (compression_level * 56) / 100;
c->num_optim_passes = compression_level / 40;
}
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
return 0;
oom1:
- ALIGNED_FREE(c);
+ FREE(c);
oom0:
return WIMLIB_ERR_NOMEM;
}
} else
#endif
FREE(c->chosen_items);
- ALIGNED_FREE(c);
+ FREE(c);
}
const struct compressor_ops xpress_compressor_ops = {