#define MIN_LEVEL_FOR_NEAR_OPTIMAL 60
/*
- * The window order for the matchfinder. This must be the base 2 logarithm of
- * the maximum buffer size.
+ * Matchfinder definitions. For XPRESS, only a 16-bit matchfinder is needed.
*/
-#define MATCHFINDER_WINDOW_ORDER 16
+#define mf_pos_t u16
+#define MF_SUFFIX
/*
- * Although XPRESS can potentially use a sliding window, it isn't well suited
- * for large buffers of data because there is no way to reset the Huffman code.
- * Therefore, we only allow buffers in which there is no restriction on match
- * offsets (no sliding window). This simplifies the code and allows some
+ * Note: although XPRESS can potentially use a sliding window, it isn't well
+ * suited for large buffers of data because there is no way to reset the Huffman
+ * code. Therefore, we only allow buffers in which there is no restriction on
+ * match offsets (no sliding window). This simplifies the code and allows some
* optimizations.
*/
-#define MATCHFINDER_IS_SLIDING 0
-
-#include <string.h>
#include "wimlib/bitops.h"
#include "wimlib/compress_common.h"
union {
/* Data for greedy or lazy parsing */
struct {
- struct hc_matchfinder hc_mf;
struct xpress_item *chosen_items;
- u8 nonoptimal_end[0];
+ struct hc_matchfinder hc_mf;
+ /* hc_mf must be last! */
};
#if SUPPORT_NEAR_OPTIMAL_PARSING
/* Data for near-optimal parsing */
struct {
- struct bt_matchfinder bt_mf;
struct xpress_optimum_node *optimum_nodes;
struct lz_match *match_cache;
struct lz_match *cache_overflow_mark;
unsigned num_optim_passes;
u32 costs[XPRESS_NUM_SYMBOLS];
- u8 optimal_end[0];
+ struct bt_matchfinder bt_mf;
+ /* bt_mf must be last! */
};
#endif
};
/* Pointer to the start of the output buffer. */
u8 *start;
- /* Pointer to the location in the ouput buffer at which to write the
+ /* Pointer to the location in the output buffer at which to write the
* next 16 bits. */
u8 *next_bits;
struct xpress_optimum_node *optimum_nodes,
size_t count, const u32 codewords[], const u8 lens[])
{
- struct xpress_optimum_node *cur_optimum_ptr = optimum_nodes;
- struct xpress_optimum_node *end_optimum_ptr = optimum_nodes + count;
+ struct xpress_optimum_node *cur_node = optimum_nodes;
+ struct xpress_optimum_node *end_node = optimum_nodes + count;
do {
- unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
- unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+ unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+ unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
if (length == 1) {
/* Literal */
} else {
/* Match */
unsigned adjusted_len;
- unsigned offset_high_bit;
+ unsigned log2_offset;
unsigned len_hdr;
unsigned sym;
adjusted_len = length - XPRESS_MIN_MATCH_LEN;
- offset_high_bit = fls32(offset);
+ log2_offset = fls32(offset);
len_hdr = min(0xF, adjusted_len);
- sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+ sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
xpress_write_bits(os, codewords[sym], lens[sym]);
xpress_write_extra_length_bytes(os, adjusted_len);
- xpress_write_bits(os, offset - (1U << offset_high_bit),
- offset_high_bit);
+ xpress_write_bits(os, offset - (1U << log2_offset),
+ log2_offset);
}
- cur_optimum_ptr += length;
- } while (cur_optimum_ptr != end_optimum_ptr);
+ cur_node += length;
+ } while (cur_node != end_node);
}
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
{
unsigned adjusted_len = length - XPRESS_MIN_MATCH_LEN;
unsigned len_hdr = min(adjusted_len, 0xF);
- unsigned offset_high_bit = fls32(offset);
- unsigned sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+ unsigned log2_offset = fls32(offset);
+ unsigned sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
c->freqs[sym]++;
return (struct xpress_item) {
.data = (u64)sym |
((u64)adjusted_len << 9) |
- ((u64)offset_high_bit << 25) |
- ((u64)(offset ^ (1U << offset_high_bit)) << 29),
+ ((u64)log2_offset << 25) |
+ ((u64)(offset ^ (1U << log2_offset)) << 29),
};
}
const void * restrict in, size_t in_nbytes,
void * restrict out, size_t out_nbytes_avail)
{
- const u8 * const in_base = in;
- const u8 * in_next = in_base;
- const u8 * const in_end = in_base + in_nbytes;
+ const u8 * const in_begin = in;
+ const u8 * in_next = in_begin;
+ const u8 * const in_end = in_begin + in_nbytes;
struct xpress_item *next_chosen_item = c->chosen_items;
unsigned len_3_too_far;
+ u32 next_hashes[2] = {};
if (in_nbytes <= 8192)
len_3_too_far = 2048;
unsigned offset;
length = hc_matchfinder_longest_match(&c->hc_mf,
- in_base,
- in_next,
+ in_begin,
+ in_next - in_begin,
XPRESS_MIN_MATCH_LEN - 1,
in_end - in_next,
min(in_end - in_next, c->nice_match_length),
c->max_search_depth,
+ next_hashes,
&offset);
if (length >= XPRESS_MIN_MATCH_LEN &&
!(length == XPRESS_MIN_MATCH_LEN && offset >= len_3_too_far))
xpress_record_match(c, length, offset);
in_next += 1;
hc_matchfinder_skip_positions(&c->hc_mf,
- in_base,
- in_next,
- in_end,
- length - 1);
+ in_begin,
+ in_next - in_begin,
+ in_end - in_begin,
+ length - 1,
+ next_hashes);
in_next += length - 1;
} else {
/* No match found */
const void * restrict in, size_t in_nbytes,
void * restrict out, size_t out_nbytes_avail)
{
- const u8 * const in_base = in;
- const u8 * in_next = in_base;
- const u8 * const in_end = in_base + in_nbytes;
+ const u8 * const in_begin = in;
+ const u8 * in_next = in_begin;
+ const u8 * const in_end = in_begin + in_nbytes;
struct xpress_item *next_chosen_item = c->chosen_items;
unsigned len_3_too_far;
+ u32 next_hashes[2] = {};
if (in_nbytes <= 8192)
len_3_too_far = 2048;
/* Find the longest match at the current position. */
cur_len = hc_matchfinder_longest_match(&c->hc_mf,
- in_base,
- in_next,
+ in_begin,
+ in_next - in_begin,
XPRESS_MIN_MATCH_LEN - 1,
in_end - in_next,
min(in_end - in_next, c->nice_match_length),
c->max_search_depth,
+ next_hashes,
&cur_offset);
in_next += 1;
xpress_record_match(c, cur_len, cur_offset);
hc_matchfinder_skip_positions(&c->hc_mf,
- in_base,
- in_next,
- in_end,
- cur_len - 1);
+ in_begin,
+ in_next - in_begin,
+ in_end - in_begin,
+ cur_len - 1,
+ next_hashes);
in_next += cur_len - 1;
continue;
}
* longest_match() inlined at each.
*/
next_len = hc_matchfinder_longest_match(&c->hc_mf,
- in_base,
- in_next,
+ in_begin,
+ in_next - in_begin,
cur_len,
in_end - in_next,
min(in_end - in_next, c->nice_match_length),
c->max_search_depth / 2,
+ next_hashes,
&next_offset);
in_next += 1;
*next_chosen_item++ =
xpress_record_match(c, cur_len, cur_offset);
hc_matchfinder_skip_positions(&c->hc_mf,
- in_base,
- in_next,
- in_end,
- cur_len - 2);
+ in_begin,
+ in_next - in_begin,
+ in_end - in_begin,
+ cur_len - 2,
+ next_hashes);
in_next += cur_len - 2;
continue;
}
*/
static void
xpress_tally_item_list(struct xpress_compressor *c,
- struct xpress_optimum_node *end_optimum_ptr)
+ struct xpress_optimum_node *end_node)
{
- struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes;
+ struct xpress_optimum_node *cur_node = c->optimum_nodes;
do {
- unsigned length = cur_optimum_ptr->item & OPTIMUM_LEN_MASK;
- unsigned offset = cur_optimum_ptr->item >> OPTIMUM_OFFSET_SHIFT;
+ unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+ unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
if (length == 1) {
/* Literal */
} else {
/* Match */
unsigned adjusted_len;
- unsigned offset_high_bit;
+ unsigned log2_offset;
unsigned len_hdr;
unsigned sym;
adjusted_len = length - XPRESS_MIN_MATCH_LEN;
- offset_high_bit = fls32(offset);
+ log2_offset = fls32(offset);
len_hdr = min(0xF, adjusted_len);
- sym = XPRESS_NUM_CHARS + ((offset_high_bit << 4) | len_hdr);
+ sym = XPRESS_NUM_CHARS + ((log2_offset << 4) | len_hdr);
c->freqs[sym]++;
}
- cur_optimum_ptr += length;
- } while (cur_optimum_ptr != end_optimum_ptr);
+ cur_node += length;
+ } while (cur_node != end_node);
}
/*
xpress_find_min_cost_path(struct xpress_compressor *c, size_t in_nbytes,
struct lz_match *end_cache_ptr)
{
- struct xpress_optimum_node *cur_optimum_ptr = c->optimum_nodes + in_nbytes;
+ struct xpress_optimum_node *cur_node = c->optimum_nodes + in_nbytes;
struct lz_match *cache_ptr = end_cache_ptr;
- cur_optimum_ptr->cost_to_end = 0;
+ cur_node->cost_to_end = 0;
do {
unsigned literal;
u32 best_item;
struct lz_match *match;
unsigned len;
- cur_optimum_ptr--;
+ cur_node--;
cache_ptr--;
literal = cache_ptr->offset;
/* Consider coding a literal. */
best_item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
best_cost_to_end = c->costs[literal] +
- (cur_optimum_ptr + 1)->cost_to_end;
+ (cur_node + 1)->cost_to_end;
num_matches = cache_ptr->length;
if (num_matches == 0) {
/* No matches; the only choice is the literal. */
- cur_optimum_ptr->cost_to_end = best_cost_to_end;
- cur_optimum_ptr->item = best_item;
+ cur_node->cost_to_end = best_cost_to_end;
+ cur_node->item = best_item;
continue;
}
/* All lengths are small. Optimize accordingly. */
do {
unsigned offset;
- unsigned offset_high_bit;
+ unsigned log2_offset;
u32 offset_cost;
offset = match->offset;
- offset_high_bit = fls32(offset);
- offset_cost = offset_high_bit;
+ log2_offset = fls32(offset);
+ offset_cost = log2_offset;
do {
unsigned len_hdr;
unsigned sym;
len_hdr = len - XPRESS_MIN_MATCH_LEN;
sym = XPRESS_NUM_CHARS +
- ((offset_high_bit << 4) | len_hdr);
+ ((log2_offset << 4) | len_hdr);
cost_to_end =
offset_cost + c->costs[sym] +
- (cur_optimum_ptr + len)->cost_to_end;
+ (cur_node + len)->cost_to_end;
if (cost_to_end < best_cost_to_end) {
best_cost_to_end = cost_to_end;
best_item =
/* Some lengths are big. */
do {
unsigned offset;
- unsigned offset_high_bit;
+ unsigned log2_offset;
u32 offset_cost;
offset = match->offset;
- offset_high_bit = fls32(offset);
- offset_cost = offset_high_bit;
+ log2_offset = fls32(offset);
+ offset_cost = log2_offset;
do {
unsigned adjusted_len;
unsigned len_hdr;
adjusted_len = len - XPRESS_MIN_MATCH_LEN;
len_hdr = min(adjusted_len, 0xF);
sym = XPRESS_NUM_CHARS +
- ((offset_high_bit << 4) | len_hdr);
+ ((log2_offset << 4) | len_hdr);
cost_to_end =
offset_cost + c->costs[sym] +
- (cur_optimum_ptr + len)->cost_to_end;
+ (cur_node + len)->cost_to_end;
if (adjusted_len >= 0xF) {
cost_to_end += 8;
if (adjusted_len - 0xF >= 0xFF)
} while (++match != cache_ptr);
}
cache_ptr -= num_matches;
- cur_optimum_ptr->cost_to_end = best_cost_to_end;
- cur_optimum_ptr->item = best_item;
- } while (cur_optimum_ptr != c->optimum_nodes);
+ cur_node->cost_to_end = best_cost_to_end;
+ cur_node->item = best_item;
+ } while (cur_node != c->optimum_nodes);
}
/*
xpress_find_matches(struct xpress_compressor * restrict c,
const void * restrict in, size_t in_nbytes)
{
- const u8 * const in_base = in;
- const u8 *in_next = in_base;
- const u8 * const in_end = in_base + in_nbytes;
+ const u8 * const in_begin = in;
+ const u8 *in_next = in_begin;
struct lz_match *cache_ptr = c->match_cache;
- unsigned long prev_hash = 0;
+ u32 next_hashes[2] = {};
+ u32 max_len = in_nbytes;
+ u32 nice_len = min(max_len, c->nice_match_length);
bt_matchfinder_init(&c->bt_mf);
- do {
- unsigned num_matches;
+ for (;;) {
+ struct lz_match *matches;
+ u32 best_len;
/* If we've found so many matches that the cache might overflow
* if we keep finding more, then stop finding matches. This
* case is very unlikely. */
- if (unlikely(cache_ptr >= c->cache_overflow_mark)) {
- do {
- cache_ptr->length = 0;
- cache_ptr->offset = *in_next++;
- cache_ptr++;
- } while (in_next != in_end);
- return cache_ptr;
- }
+ if (unlikely(cache_ptr >= c->cache_overflow_mark ||
+ max_len < BT_MATCHFINDER_REQUIRED_NBYTES))
+ break;
+
+ matches = cache_ptr;
/* Find matches with the current position using the binary tree
* matchfinder and save them in the next available slots in
* the match cache. */
- num_matches =
+ cache_ptr =
bt_matchfinder_get_matches(&c->bt_mf,
- in_base,
- in_next,
- XPRESS_MIN_MATCH_LEN,
- in_end - in_next,
- min(in_end - in_next, c->nice_match_length),
+ in_begin,
+ in_next - in_begin,
+ max_len,
+ nice_len,
c->max_search_depth,
- &prev_hash,
+ next_hashes,
+ &best_len,
cache_ptr);
- cache_ptr += num_matches;
- cache_ptr->length = num_matches;
- cache_ptr->offset = *in_next;
- in_next++;
+ cache_ptr->length = cache_ptr - matches;
+ cache_ptr->offset = *in_next++;
cache_ptr++;
+ max_len--;
+ nice_len = min(nice_len, max_len);
- if (num_matches) {
- /*
- * If there was a very long match found, then don't
- * cache any matches for the bytes covered by that
- * match. This avoids degenerate behavior when
- * compressing highly redundant data, where the number
- * of matches can be very large.
- *
- * This heuristic doesn't actually hurt the compression
- * ratio very much. If there's a long match, then the
- * data must be highly compressible, so it doesn't
- * matter as much what we do.
- */
- unsigned best_len = cache_ptr[-2].length;
- if (best_len >= c->nice_match_length) {
- --best_len;
- do {
- bt_matchfinder_skip_position(&c->bt_mf,
- in_base,
- in_next,
- in_end,
- min(in_end - in_next,
- c->nice_match_length),
- c->max_search_depth,
- &prev_hash);
-
- cache_ptr->length = 0;
- cache_ptr->offset = *in_next++;
- cache_ptr++;
- } while (--best_len);
- }
+ /*
+ * If there was a very long match found, then don't cache any
+ * matches for the bytes covered by that match. This avoids
+ * degenerate behavior when compressing highly redundant data,
+ * where the number of matches can be very large.
+ *
+ * This heuristic doesn't actually hurt the compression ratio
+ * very much. If there's a long match, then the data must be
+ * highly compressible, so it doesn't matter as much what we do.
+ */
+ if (best_len >= nice_len) {
+ if (unlikely(best_len +
+ BT_MATCHFINDER_REQUIRED_NBYTES >= max_len))
+ break;
+ --best_len;
+ do {
+ bt_matchfinder_skip_position(&c->bt_mf,
+ in_begin,
+ in_next - in_begin,
+ max_len,
+ nice_len,
+ c->max_search_depth,
+ next_hashes);
+ cache_ptr->length = 0;
+ cache_ptr->offset = *in_next++;
+ cache_ptr++;
+ max_len--;
+ nice_len = min(nice_len, max_len);
+ } while (--best_len);
}
- } while (in_next != in_end);
+ }
+
+ while (max_len--) {
+ cache_ptr->length = 0;
+ cache_ptr->offset = *in_next++;
+ cache_ptr++;
+ }
return cache_ptr;
}
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
+static size_t
+xpress_get_compressor_size(size_t max_bufsize, unsigned compression_level)
+{
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+ if (compression_level >= MIN_LEVEL_FOR_NEAR_OPTIMAL)
+ return offsetof(struct xpress_compressor, bt_mf) +
+ bt_matchfinder_size(max_bufsize);
+#endif
+
+ return offsetof(struct xpress_compressor, hc_mf) +
+ hc_matchfinder_size(max_bufsize);
+}
+
static u64
-xpress_get_needed_memory(size_t max_bufsize, unsigned compression_level)
+xpress_get_needed_memory(size_t max_bufsize, unsigned compression_level,
+ bool destructive)
{
- size_t size = 0;
+ u64 size = 0;
if (max_bufsize > XPRESS_MAX_BUFSIZE)
return 0;
+ size += xpress_get_compressor_size(max_bufsize, compression_level);
+
if (compression_level < MIN_LEVEL_FOR_NEAR_OPTIMAL ||
!SUPPORT_NEAR_OPTIMAL_PARSING) {
- size += offsetof(struct xpress_compressor, nonoptimal_end);
+ /* chosen_items */
size += max_bufsize * sizeof(struct xpress_item);
}
#if SUPPORT_NEAR_OPTIMAL_PARSING
else {
- size += offsetof(struct xpress_compressor, optimal_end);
+ /* optimum_nodes */
size += (max_bufsize + 1) * sizeof(struct xpress_optimum_node);
+ /* match_cache */
size += ((max_bufsize * CACHE_RESERVE_PER_POS) +
XPRESS_MAX_MATCH_LEN + max_bufsize) *
sizeof(struct lz_match);
static int
xpress_create_compressor(size_t max_bufsize, unsigned compression_level,
- void **c_ret)
+ bool destructive, void **c_ret)
{
struct xpress_compressor *c;
if (max_bufsize > XPRESS_MAX_BUFSIZE)
return WIMLIB_ERR_INVALID_PARAM;
- if (compression_level < 30) {
- c = ALIGNED_MALLOC(offsetof(struct xpress_compressor,
- nonoptimal_end),
- MATCHFINDER_ALIGNMENT);
- if (!c)
- return WIMLIB_ERR_NOMEM;
- c->impl = xpress_compress_greedy;
- c->max_search_depth = (compression_level * 24) / 16;
- c->nice_match_length = (compression_level * 48) / 16;
- c->chosen_items = MALLOC(max_bufsize * sizeof(struct xpress_item));
- if (!c->chosen_items) {
- ALIGNED_FREE(c);
- return WIMLIB_ERR_NOMEM;
- }
- } else if (compression_level < MIN_LEVEL_FOR_NEAR_OPTIMAL ||
- !SUPPORT_NEAR_OPTIMAL_PARSING)
+ c = MALLOC(xpress_get_compressor_size(max_bufsize, compression_level));
+ if (!c)
+ goto oom0;
+
+ if (compression_level < MIN_LEVEL_FOR_NEAR_OPTIMAL ||
+ !SUPPORT_NEAR_OPTIMAL_PARSING)
{
- c = ALIGNED_MALLOC(offsetof(struct xpress_compressor,
- nonoptimal_end),
- MATCHFINDER_ALIGNMENT);
- if (!c)
- return WIMLIB_ERR_NOMEM;
-
- c->impl = xpress_compress_lazy;
- c->max_search_depth = (compression_level * 24) / 32;
- c->nice_match_length = (compression_level * 48) / 32;
+
c->chosen_items = MALLOC(max_bufsize * sizeof(struct xpress_item));
- if (!c->chosen_items) {
- ALIGNED_FREE(c);
- return WIMLIB_ERR_NOMEM;
+ if (!c->chosen_items)
+ goto oom1;
+
+ if (compression_level < 30) {
+ c->impl = xpress_compress_greedy;
+ c->max_search_depth = (compression_level * 30) / 16;
+ c->nice_match_length = (compression_level * 60) / 16;
+ } else {
+ c->impl = xpress_compress_lazy;
+ c->max_search_depth = (compression_level * 30) / 32;
+ c->nice_match_length = (compression_level * 60) / 32;
+
+ /* xpress_compress_lazy() needs max_search_depth >= 2
+ * because it halves the max_search_depth when
+ * attempting a lazy match, and max_search_depth cannot
+ * be 0. */
+ if (c->max_search_depth < 2)
+ c->max_search_depth = 2;
}
}
#if SUPPORT_NEAR_OPTIMAL_PARSING
else {
- c = ALIGNED_MALLOC(offsetof(struct xpress_compressor,
- optimal_end),
- MATCHFINDER_ALIGNMENT);
- if (!c)
- return WIMLIB_ERR_NOMEM;
- c->impl = xpress_compress_near_optimal;
- c->max_search_depth = (compression_level * 32) / 100;
- c->nice_match_length = (compression_level * 50) / 100;
- c->num_optim_passes = compression_level / 40;
c->optimum_nodes = MALLOC((max_bufsize + 1) *
sizeof(struct xpress_optimum_node));
if (!c->optimum_nodes || !c->match_cache) {
FREE(c->optimum_nodes);
FREE(c->match_cache);
- ALIGNED_FREE(c);
- return WIMLIB_ERR_NOMEM;
+ goto oom1;
}
c->cache_overflow_mark =
&c->match_cache[max_bufsize * CACHE_RESERVE_PER_POS];
+
+ c->impl = xpress_compress_near_optimal;
+ c->max_search_depth = (compression_level * 28) / 100;
+ c->nice_match_length = (compression_level * 56) / 100;
+ c->num_optim_passes = compression_level / 40;
}
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
+ /* max_search_depth == 0 is invalid. */
+ if (c->max_search_depth < 1)
+ c->max_search_depth = 1;
+
*c_ret = c;
return 0;
+
+oom1:
+ FREE(c);
+oom0:
+ return WIMLIB_ERR_NOMEM;
}
static size_t
-xpress_compress(const void *in, size_t in_nbytes,
- void *out, size_t out_nbytes_avail, void *_c)
+xpress_compress(const void *restrict in, size_t in_nbytes,
+ void *restrict out, size_t out_nbytes_avail, void *restrict _c)
{
struct xpress_compressor *c = _c;
+ /* Don't bother trying to compress very small inputs. */
+ if (in_nbytes < 25)
+ return 0;
+
if (out_nbytes_avail <= XPRESS_NUM_SYMBOLS / 2 + 4)
return 0;
{
struct xpress_compressor *c = _c;
- if (c) {
- #if SUPPORT_NEAR_OPTIMAL_PARSING
- if (c->impl == xpress_compress_near_optimal) {
- FREE(c->optimum_nodes);
- FREE(c->match_cache);
- } else
- #endif
- FREE(c->chosen_items);
- ALIGNED_FREE(c);
- }
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+ if (c->impl == xpress_compress_near_optimal) {
+ FREE(c->optimum_nodes);
+ FREE(c->match_cache);
+ } else
+#endif
+ FREE(c->chosen_items);
+ FREE(c);
}
const struct compressor_ops xpress_compressor_ops = {