X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fxpress-compress.c;h=14cf2fcc8b5ec4ef76c8434918b7a969249d3975;hp=228ac51e623b166aeeb58d653b746d881c9d6e3c;hb=899c6ea158c7df7d35000f3afe448acff7f2bc23;hpb=394751ae13025edab605cd61c8e32819e3fb33a1 diff --git a/src/xpress-compress.c b/src/xpress-compress.c index 228ac51e..14cf2fcc 100644 --- a/src/xpress-compress.c +++ b/src/xpress-compress.c @@ -1,13 +1,12 @@ /* * xpress-compress.c * - * XPRESS compression routines. - * - * See the comments in xpress-decompress.c about the XPRESS format. + * A compressor that produces output compatible with the XPRESS (Huffman + * version) compression format. */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2012, 2013, 2014 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * @@ -29,57 +28,120 @@ # include "config.h" #endif -#include "wimlib.h" -#include "wimlib/assert.h" #include "wimlib/compressor_ops.h" #include "wimlib/compress_common.h" #include "wimlib/error.h" -#include "wimlib/lz_hash.h" +#include "wimlib/lz_mf.h" #include "wimlib/util.h" #include "wimlib/xpress.h" #include -struct xpress_record_ctx { - input_idx_t freqs[XPRESS_NUM_SYMBOLS]; - struct xpress_match *matches; +#define XPRESS_CACHE_PER_POS 8 +#define XPRESS_OPTIM_ARRAY_LENGTH 4096 + +struct xpress_compressor; +struct xpress_item; +struct xpress_mc_pos_data; + +struct xpress_compressor_params { + + /* Only used when choose_items_func == xpress_choose_items_near_optimal */ + u32 num_optim_passes; + + /* Given the data to compress (c->cur_window, c->cur_window_size), + * 'choose_items_func' fills in c->chosen_items with the intermediate + * representation of the match/literal sequence to output. Also fills + * in c->codewords and c->lens to provide the Huffman code with which + * these items should be output. + * + * Returns the number of items written to c->chosen_items. This can be + * at most c->cur_window_size. (The worst case is all literals, no + * matches.) */ + u32 (*choose_items_func)(struct xpress_compressor *c); + + /* Match-finding algorithm and parameters */ + enum lz_mf_algo mf_algo; + u32 max_search_depth; + u32 nice_match_length; }; +/* XPRESS compressor state. */ struct xpress_compressor { - u8 *window; - u32 max_window_size; - struct xpress_match *matches; - input_idx_t *prev_tab; + + /* Parameters determined based on the compression level. */ + struct xpress_compressor_params params; + + /* Lempel-Ziv match-finder */ + struct lz_mf *mf; + + /* Optimal parsing data */ + unsigned (*get_matches_func)(struct xpress_compressor *, + const struct lz_match **); + void (*skip_bytes_func)(struct xpress_compressor *, u32 n); + const u8 *cur_window_ptr; + struct lz_match *cached_matches; + struct lz_match *cache_ptr; + struct lz_match *cache_limit; + struct xpress_mc_pos_data *optimum; + unsigned optimum_cur_idx; + unsigned optimum_end_idx; + u8 costs[XPRESS_NUM_SYMBOLS]; + + /* The selected sequence of matches/literals */ + struct xpress_item *chosen_items; + + /* Data currently being compressed */ + const u8 *cur_window; + u32 cur_window_size; + + /* Symbol frequency counters */ + u32 freqs[XPRESS_NUM_SYMBOLS]; + + /* The current Huffman code */ u32 codewords[XPRESS_NUM_SYMBOLS]; u8 lens[XPRESS_NUM_SYMBOLS]; - struct xpress_record_ctx record_ctx; +}; + +/* Match-chooser position data. + * See corresponding declaration in lzx-compress.c for more information. */ +struct xpress_mc_pos_data { + u32 cost; +#define MC_INFINITE_COST ((u32)~0UL) + + union { + struct { + u32 link; + u32 match_offset; + } prev; + struct { + u32 link; + u32 match_offset; + } next; + }; }; /* Intermediate XPRESS match/literal representation. */ -struct xpress_match { +struct xpress_item { u16 adjusted_len; /* Match length minus XPRESS_MIN_MATCH_LEN */ u16 offset; /* Match offset */ - /* For literals, offset == 0 and adjusted_len is the literal. */ + /* For literals, offset == 0 and adjusted_len is the literal byte. */ }; -/* - * Writes @match, which is a match given in the intermediate representation for - * XPRESS matches, to the output stream @ostream. - * - * @codewords and @lens provide the Huffman code that is being used. - */ +/* Output an XPRESS match. */ static void -xpress_write_match(struct xpress_match match, - struct output_bitstream *restrict ostream, - const u32 codewords[restrict], - const u8 lens[restrict]) +xpress_write_match(struct xpress_item match, struct output_bitstream *ostream, + const u32 codewords[], const u8 lens[]) { - u8 len_hdr = min(match.adjusted_len, 0xf); - u8 offset_bsr = bsr32(match.offset); + unsigned len_hdr = min(match.adjusted_len, 0xf); + unsigned offset_bsr = bsr32(match.offset); unsigned sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr); + /* Huffman symbol */ bitstream_put_bits(ostream, codewords[sym], lens[sym]); + /* If length >= 18, one extra length byte. + * If length >= 273, three (total) extra length bytes. */ if (match.adjusted_len >= 0xf) { u8 byte1 = min(match.adjusted_len - 0xf, 0xff); bitstream_put_byte(ostream, byte1); @@ -88,218 +150,900 @@ xpress_write_match(struct xpress_match match, bitstream_put_byte(ostream, match.adjusted_len >> 8); } } + + /* Offset bits */ bitstream_put_bits(ostream, match.offset ^ (1U << offset_bsr), offset_bsr); } +/* Output a sequence of XPRESS matches and literals. */ static void -xpress_write_matches_and_literals(struct output_bitstream *ostream, - const struct xpress_match matches[restrict], - input_idx_t num_matches, - const u32 codewords[restrict], - const u8 lens[restrict]) +xpress_write_items(struct output_bitstream *ostream, + const struct xpress_item items[], u32 num_items, + const u32 codewords[], const u8 lens[]) { - for (input_idx_t i = 0; i < num_matches; i++) { - if (matches[i].offset) { - /* Real match */ - xpress_write_match(matches[i], ostream, codewords, lens); + for (u32 i = 0; i < num_items; i++) { + if (items[i].offset) { + /* Match */ + xpress_write_match(items[i], ostream, codewords, lens); } else { - /* Literal byte */ - u8 lit = matches[i].adjusted_len; + /* Literal */ + unsigned lit = items[i].adjusted_len; bitstream_put_bits(ostream, codewords[lit], lens[lit]); } } + /* End-of-data symbol (required for MS compatibility) */ bitstream_put_bits(ostream, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]); } +/* Make the Huffman code for XPRESS. + * + * Takes as input c->freqs and produces as output c->lens and c->codewords. */ static void -xpress_record_literal(u8 lit, void *_ctx) +xpress_make_huffman_code(struct xpress_compressor *c) { - struct xpress_record_ctx *ctx = _ctx; - ctx->freqs[lit]++; - *(ctx->matches++) = (struct xpress_match) { .offset = 0, .adjusted_len = lit }; + make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN, + c->freqs, c->lens, c->codewords); } -static void -xpress_record_match(unsigned len, unsigned offset, void *_ctx) +/* Account for the Huffman symbol that would be produced by outputting the + * specified literal. Returns the intermediate representation of the literal. + */ +static inline struct xpress_item +xpress_tally_literal(u8 lit, u32 freqs[]) { - struct xpress_record_ctx *ctx = _ctx; - - XPRESS_ASSERT(len >= XPRESS_MIN_MATCH_LEN); - XPRESS_ASSERT(len <= XPRESS_MAX_MATCH_LEN); - XPRESS_ASSERT(offset >= XPRESS_MIN_OFFSET); - XPRESS_ASSERT(offset <= XPRESS_MAX_OFFSET); + freqs[lit]++; + return (struct xpress_item) { .offset = 0, .adjusted_len = lit }; +} - unsigned adjusted_len = len - XPRESS_MIN_MATCH_LEN; +/* Account for the Huffman symbol that would be produced by outputting the + * specified match. Returns the intermediate representation of the match. */ +static inline struct xpress_item +xpress_tally_match(u32 len, u32 offset, u32 freqs[]) +{ + u32 adjusted_len = len - XPRESS_MIN_MATCH_LEN; unsigned len_hdr = min(adjusted_len, 0xf); unsigned sym = XPRESS_NUM_CHARS + ((bsr32(offset) << 4) | len_hdr); - XPRESS_ASSERT(sym >= XPRESS_NUM_CHARS); - XPRESS_ASSERT(sym < XPRESS_NUM_SYMBOLS); + freqs[sym]++; + return (struct xpress_item) { .offset = offset, + .adjusted_len = adjusted_len }; +} + +static unsigned +xpress_get_matches_fillcache(struct xpress_compressor *c, + const struct lz_match **matches_ret) +{ + struct lz_match *cache_ptr; + struct lz_match *matches; + unsigned num_matches; - ctx->freqs[sym]++; - *(ctx->matches++) = (struct xpress_match) { .offset = offset, - .adjusted_len = adjusted_len }; + cache_ptr = c->cache_ptr; + matches = cache_ptr + 1; + if (likely(cache_ptr <= c->cache_limit)) { + num_matches = lz_mf_get_matches(c->mf, matches); + cache_ptr->len = num_matches; + c->cache_ptr = matches + num_matches; + } else { + num_matches = 0; + } + c->cur_window_ptr++; + *matches_ret = matches; + return num_matches; } -static const struct lz_params xpress_lz_params = { - .min_match = XPRESS_MIN_MATCH_LEN, - .max_match = XPRESS_MAX_MATCH_LEN, - .max_offset = XPRESS_MAX_OFFSET, - .good_match = 16, - .nice_match = 32, - .max_chain_len = 16, - .max_lazy_match = 16, - .too_far = 4096, -}; +static unsigned +xpress_get_matches_usecache(struct xpress_compressor *c, + const struct lz_match **matches_ret) +{ + struct lz_match *cache_ptr; + struct lz_match *matches; + unsigned num_matches; -static size_t -xpress_compress(const void *uncompressed_data, size_t uncompressed_size, - void *compressed_data, size_t compressed_size_avail, void *_c) + cache_ptr = c->cache_ptr; + matches = cache_ptr + 1; + if (likely(cache_ptr <= c->cache_limit)) { + num_matches = cache_ptr->len; + c->cache_ptr = matches + num_matches; + } else { + num_matches = 0; + } + c->cur_window_ptr++; + *matches_ret = matches; + return num_matches; +} + +static unsigned +xpress_get_matches_usecache_nocheck(struct xpress_compressor *c, + const struct lz_match **matches_ret) { - struct xpress_compressor *c = _c; - u8 *cptr = compressed_data; - struct output_bitstream ostream; - input_idx_t num_matches; - input_idx_t i; - size_t compressed_size; + struct lz_match *cache_ptr; + struct lz_match *matches; + unsigned num_matches; - /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's - * impossible to compress 256 bytes or less of data to less than the - * input size. - * - * +1 to take into account that the buffer for compressed data is 1 byte - * smaller than the buffer for uncompressed data. - * - * +4 to take into account that init_output_bitstream() requires at - * least 4 bytes of data. */ - if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 1 + 4) - return 0; + cache_ptr = c->cache_ptr; + matches = cache_ptr + 1; + num_matches = cache_ptr->len; + c->cache_ptr = matches + num_matches; + c->cur_window_ptr++; + *matches_ret = matches; + return num_matches; +} - /* Copy the data to a temporary buffer, but only to avoid - * inconsequential accesses of uninitialized memory in - * lz_analyze_block(). */ - memcpy(c->window, uncompressed_data, uncompressed_size); - memset(c->window + uncompressed_size, 0, 8); +static unsigned +xpress_get_matches_noncaching(struct xpress_compressor *c, + const struct lz_match **matches_ret) +{ + c->cur_window_ptr++; + *matches_ret = c->cached_matches; + return lz_mf_get_matches(c->mf, c->cached_matches); +} - /* Determine match/literal sequence to divide the data into. */ - memset(c->record_ctx.freqs, 0, sizeof(c->record_ctx.freqs)); - c->record_ctx.matches = c->matches; - lz_analyze_block(c->window, - uncompressed_size, - xpress_record_match, - xpress_record_literal, - &c->record_ctx, - &xpress_lz_params, - c->prev_tab); - - num_matches = (c->record_ctx.matches - c->matches); - - /* Account for end of data symbol. */ - c->record_ctx.freqs[XPRESS_END_OF_DATA]++; - - /* Build the Huffman code. */ - make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN, - c->record_ctx.freqs, c->lens, c->codewords); +/* + * Find matches at the next position in the window. + * + * Returns the number of matches found and sets *matches_ret to point to the + * matches array. The matches will be sorted by strictly increasing length and + * offset. + */ +static inline unsigned +xpress_get_matches(struct xpress_compressor *c, + const struct lz_match **matches_ret) +{ + return (*c->get_matches_func)(c, matches_ret); +} - /* Output the Huffman code as a series of 512 4-bit lengths. */ - for (i = 0; i < XPRESS_NUM_SYMBOLS; i += 2) - *cptr++ = (c->lens[i] & 0xf) | (c->lens[i + 1] << 4); +static void +xpress_skip_bytes_fillcache(struct xpress_compressor *c, u32 n) +{ + struct lz_match *cache_ptr; - /* Output the encoded matches/literals. */ - init_output_bitstream(&ostream, cptr, - compressed_size_avail - XPRESS_NUM_SYMBOLS / 2 - 1); + c->cur_window_ptr += n; + cache_ptr = c->cache_ptr; + lz_mf_skip_positions(c->mf, n); + if (likely(cache_ptr <= c->cache_limit)) { + do { + cache_ptr->len = 0; + cache_ptr += 1; + } while (--n && likely(cache_ptr <= c->cache_limit)); + } + c->cache_ptr = cache_ptr; +} - xpress_write_matches_and_literals(&ostream, c->matches, - num_matches, c->codewords, c->lens); +static void +xpress_skip_bytes_usecache(struct xpress_compressor *c, u32 n) +{ + struct lz_match *cache_ptr; - /* Flush any pending data and get the length of the compressed data. */ - compressed_size = flush_output_bitstream(&ostream); - if (compressed_size == ~(input_idx_t)0) - return 0; + c->cur_window_ptr += n; + cache_ptr = c->cache_ptr; + if (likely(cache_ptr <= c->cache_limit)) { + do { + cache_ptr += 1 + cache_ptr->len; + } while (--n && likely(cache_ptr <= c->cache_limit)); + } + c->cache_ptr = cache_ptr; +} - compressed_size += XPRESS_NUM_SYMBOLS / 2; +static void +xpress_skip_bytes_usecache_nocheck(struct xpress_compressor *c, u32 n) +{ + struct lz_match *cache_ptr; + + c->cur_window_ptr += n; + cache_ptr = c->cache_ptr; + do { + cache_ptr += 1 + cache_ptr->len; + } while (--n); + c->cache_ptr = cache_ptr; +} + +static void +xpress_skip_bytes_noncaching(struct xpress_compressor *c, u32 n) +{ + c->cur_window_ptr += n; + lz_mf_skip_positions(c->mf, n); +} + +/* + * Skip the specified number of positions in the window (don't search for + * matches at them). + */ +static inline void +xpress_skip_bytes(struct xpress_compressor *c, u32 n) +{ + return (*c->skip_bytes_func)(c, n); +} + +/* + * Returns the cost, in bits, required to output the literal from the previous + * window position (the position at which matches were last searched). + */ +static inline u32 +xpress_prev_literal_cost(const struct xpress_compressor *c) +{ + return c->costs[*(c->cur_window_ptr - 1)]; +} + +/* + * Reverse the linked list of near-optimal matches so that they can be returned + * in forwards order. + * + * Returns the first match in the list. + */ +static struct lz_match +xpress_match_chooser_reverse_list(struct xpress_compressor *c, unsigned cur_pos) +{ + unsigned prev_link, saved_prev_link; + u32 prev_match_offset, saved_prev_match_offset; + + c->optimum_end_idx = cur_pos; + + saved_prev_link = c->optimum[cur_pos].prev.link; + saved_prev_match_offset = c->optimum[cur_pos].prev.match_offset; + + do { + prev_link = saved_prev_link; + prev_match_offset = saved_prev_match_offset; + + saved_prev_link = c->optimum[prev_link].prev.link; + saved_prev_match_offset = c->optimum[prev_link].prev.match_offset; + + c->optimum[prev_link].next.link = cur_pos; + c->optimum[prev_link].next.match_offset = prev_match_offset; + + cur_pos = prev_link; + } while (cur_pos != 0); + + c->optimum_cur_idx = c->optimum[0].next.link; + + return (struct lz_match) + { .len = c->optimum_cur_idx, + .offset = c->optimum[0].next.match_offset, + }; +} + +/* + * Near-optimal parsing. + * + * This does a forward lowest-cost path search. The search is terminated when a + * sufficiently long match is found, when the search reaches a position with no + * alternatives, or when the temporary 'optimum' array fills up. After + * termination of the search, matches/literals will be returned one by one by + * successive calls to this function. Once all the matches/literals are used + * up, the next call to this function will begin a new search. + */ +static struct lz_match +xpress_choose_near_optimal_item(struct xpress_compressor *c) +{ + const struct lz_match *matches; + unsigned num_matches; + struct lz_match match; + unsigned cur_pos; + unsigned end_pos; + struct xpress_mc_pos_data * const optimum = c->optimum; + + if (c->optimum_cur_idx != c->optimum_end_idx) { + /* Return previously computed match or literal. */ + match.len = optimum[c->optimum_cur_idx].next.link - + c->optimum_cur_idx; + match.offset = optimum[c->optimum_cur_idx].next.match_offset; + + c->optimum_cur_idx = optimum[c->optimum_cur_idx].next.link; + return match; + } + + c->optimum_cur_idx = 0; + c->optimum_end_idx = 0; + + num_matches = xpress_get_matches(c, &matches); + + if (num_matches == 0) + return (struct lz_match) {}; + + if (matches[num_matches - 1].len >= c->params.nice_match_length) { + /* Take the long match immediately. */ + xpress_skip_bytes(c, matches[num_matches - 1].len - 1); + return matches[num_matches - 1]; + } + + /* Consider coding a literal. */ + optimum[1].cost = xpress_prev_literal_cost(c); + optimum[1].prev.link = 0; + + optimum[2].cost = MC_INFINITE_COST; -#if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION) - /* Verify that we really get the same thing back when decompressing. */ { - struct wimlib_decompressor *decompressor; + /* Consider coding a match. Cost evaluation is hand-inlined so + * that we can do some performance hacks. */ - if (0 == wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_XPRESS, - c->max_window_size, - NULL, - &decompressor)) - { - int ret; - ret = wimlib_decompress(compressed_data, - compressed_size, - c->window, - uncompressed_size, - decompressor); - wimlib_free_decompressor(decompressor); - - if (ret) { - ERROR("Failed to decompress data we " - "compressed using XPRESS algorithm"); - wimlib_assert(0); - return 0; + unsigned i = 0; + unsigned len = 3; + struct xpress_mc_pos_data *optimum_ptr = &optimum[len]; + + if (matches[num_matches - 1].len < 0xf + XPRESS_MIN_MATCH_LEN) { + do { + u32 offset = matches[i].offset; + u32 offset_bsr = bsr32(offset); + unsigned len_hdr = len - XPRESS_MIN_MATCH_LEN; + unsigned sym = XPRESS_NUM_CHARS + + ((offset_bsr << 4) | len_hdr); + do { + optimum_ptr->prev.link = 0; + optimum_ptr->prev.match_offset = offset; + optimum_ptr->cost = offset_bsr + c->costs[sym]; + sym++; + optimum_ptr++; + } while (++len <= matches[i].len); + } while (++i != num_matches); + } else { + do { + u32 offset = matches[i].offset; + u32 offset_bsr = bsr32(offset); + do { + u32 adjusted_len = len - XPRESS_MIN_MATCH_LEN; + unsigned len_hdr = min(adjusted_len, 0xf); + unsigned sym = XPRESS_NUM_CHARS + + ((offset_bsr << 4) | len_hdr); + u32 cost = offset_bsr + c->costs[sym]; + if (adjusted_len >= 0xf) { + cost += 8; + if (adjusted_len - 0xf >= 0xff) + cost += 16; + } + + optimum_ptr->prev.link = 0; + optimum_ptr->prev.match_offset = offset; + optimum_ptr->cost = cost; + optimum_ptr++; + } while (++len <= matches[i].len); + } while (++i != num_matches); + } + } + + end_pos = matches[num_matches - 1].len; + cur_pos = 1; + do { + u32 cost; + u32 longest_len; + + num_matches = xpress_get_matches(c, &matches); + + if (num_matches) { + longest_len = matches[num_matches - 1].len; + if (longest_len >= c->params.nice_match_length) { + /* Take the long match immediately. */ + match = xpress_match_chooser_reverse_list(c, cur_pos); + + optimum[cur_pos].next.match_offset = + matches[num_matches - 1].offset; + optimum[cur_pos].next.link = cur_pos + longest_len; + c->optimum_end_idx = cur_pos + longest_len; + + xpress_skip_bytes(c, longest_len - 1); + + return match; } - if (memcmp(uncompressed_data, c->window, - uncompressed_size)) - { - ERROR("Data we compressed using XPRESS algorithm " - "didn't decompress to original"); - wimlib_assert(0); - return 0; + } else { + longest_len = 1; + } + + while (end_pos < cur_pos + longest_len) + optimum[++end_pos].cost = MC_INFINITE_COST; + + /* Consider coding a literal. */ + cost = optimum[cur_pos].cost + xpress_prev_literal_cost(c); + if (cost < optimum[cur_pos + 1].cost) { + optimum[cur_pos + 1].cost = cost; + optimum[cur_pos + 1].prev.link = cur_pos; + } + + if (num_matches) { + /* Consider coding a match. Cost evaluation is + * hand-inlined so that we can do some performance + * hacks. */ + unsigned i = 0; + unsigned len = 3; + struct xpress_mc_pos_data *optimum_ptr = &optimum[cur_pos + 3]; + u32 cur_cost = optimum[cur_pos].cost; + + if (matches[num_matches - 1].len < 0xf + XPRESS_MIN_MATCH_LEN) { + do { + u32 offset = matches[i].offset; + u32 offset_bsr = bsr32(offset); + unsigned len_hdr = len - XPRESS_MIN_MATCH_LEN; + unsigned sym = XPRESS_NUM_CHARS + + ((offset_bsr << 4) | len_hdr); + + u32 base_cost = cur_cost + offset_bsr; + do { + cost = base_cost + c->costs[sym]; + if (cost < optimum_ptr->cost) { + optimum_ptr->prev.link = cur_pos; + optimum_ptr->prev.match_offset = offset; + optimum_ptr->cost = cost; + } + sym++; + optimum_ptr++; + } while (++len <= matches[i].len); + } while (++i != num_matches); + } else { + do { + u32 offset = matches[i].offset; + u32 offset_bsr = bsr32(offset); + + u32 base_cost = cur_cost + offset_bsr; + do { + u32 adjusted_len = len - XPRESS_MIN_MATCH_LEN; + unsigned len_hdr = min(adjusted_len, 0xf); + unsigned sym = XPRESS_NUM_CHARS + + ((offset_bsr << 4) | len_hdr); + + cost = base_cost + c->costs[sym]; + if (adjusted_len >= 0xf) { + cost += 8; + if (adjusted_len - 0xf >= 0xff) + cost += 16; + } + + if (cost < optimum_ptr->cost) { + optimum_ptr->prev.link = cur_pos; + optimum_ptr->prev.match_offset = offset; + optimum_ptr->cost = cost; + } + optimum_ptr++; + } while (++len <= matches[i].len); + } while (++i != num_matches); } + } + + cur_pos++; + + } while (cur_pos != end_pos && cur_pos != XPRESS_OPTIM_ARRAY_LENGTH); + + return xpress_match_chooser_reverse_list(c, cur_pos); +} + +/* Set default XPRESS Huffman symbol costs to kick-start the iterative + * optimization algorithm. */ +static void +xpress_set_default_costs(u8 costs[]) +{ + unsigned i; + + for (i = 0; i < XPRESS_NUM_CHARS; i++) + costs[i] = 8; + + for (; i < XPRESS_NUM_SYMBOLS; i++) + costs[i] = 10; +} + +/* Copy the Huffman codeword lengths array @lens to the Huffman symbol costs + * array @costs, but also assign a default cost to each 0-length (unused) + * codeword. */ +static void +xpress_set_costs(u8 costs[], const u8 lens[]) +{ + for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i++) + costs[i] = lens[i] ? lens[i] : XPRESS_MAX_CODEWORD_LEN; +} + +/* Near-optimal parsing */ +static u32 +xpress_choose_items_near_optimal(struct xpress_compressor *c) +{ + u32 num_passes_remaining = c->params.num_optim_passes; + const u8 *window_ptr; + const u8 *window_end; + struct xpress_item *next_chosen_item; + struct lz_match raw_item; + struct xpress_item xpress_item; + + xpress_set_default_costs(c->costs); + c->optimum_cur_idx = 0; + c->optimum_end_idx = 0; + + if (c->params.num_optim_passes > 1) { + c->get_matches_func = xpress_get_matches_fillcache; + c->skip_bytes_func = xpress_skip_bytes_fillcache; + } else { + c->get_matches_func = xpress_get_matches_noncaching; + c->skip_bytes_func = xpress_skip_bytes_noncaching; + } + + lz_mf_load_window(c->mf, c->cur_window, c->cur_window_size); + + while (--num_passes_remaining) { + c->cur_window_ptr = c->cur_window; + window_ptr = c->cur_window; + window_end = window_ptr + c->cur_window_size; + c->cache_ptr = c->cached_matches; + memset(c->freqs, 0, sizeof(c->freqs)); + + while (window_ptr != window_end) { + raw_item = xpress_choose_near_optimal_item(c); + if (raw_item.len >= XPRESS_MIN_MATCH_LEN) { + xpress_tally_match(raw_item.len, + raw_item.offset, c->freqs); + window_ptr += raw_item.len; + } else { + xpress_tally_literal(*window_ptr, c->freqs); + window_ptr += 1; + } + } + c->freqs[XPRESS_END_OF_DATA]++; + xpress_make_huffman_code(c); + xpress_set_costs(c->costs, c->lens); + if (c->cache_ptr <= c->cache_limit) { + c->get_matches_func = xpress_get_matches_usecache_nocheck; + c->skip_bytes_func = xpress_skip_bytes_usecache_nocheck; } else { - WARNING("Failed to create decompressor for " - "data verification!"); + c->get_matches_func = xpress_get_matches_usecache; + c->skip_bytes_func = xpress_skip_bytes_usecache; } } -#endif - return compressed_size; + c->cur_window_ptr = c->cur_window; + window_ptr = c->cur_window; + window_end = window_ptr + c->cur_window_size; + c->cache_ptr = c->cached_matches; + memset(c->freqs, 0, sizeof(c->freqs)); + next_chosen_item = c->chosen_items; + + u32 unseen_cost = 9; + while (window_ptr != window_end) { + raw_item = xpress_choose_near_optimal_item(c); + if (raw_item.len >= XPRESS_MIN_MATCH_LEN) { + xpress_item = xpress_tally_match(raw_item.len, + raw_item.offset, + c->freqs); + window_ptr += raw_item.len; + } else { + xpress_item = xpress_tally_literal(*window_ptr, + c->freqs); + window_ptr += 1; + } + *next_chosen_item++ = xpress_item; + + /* When doing one-pass near-optimal parsing, rebuild the Huffman + * code occasionally. */ + if (unlikely((next_chosen_item - c->chosen_items) % 2048 == 0) && + c->cur_window_size >= 16384 && + c->params.num_optim_passes == 1) + { + xpress_make_huffman_code(c); + for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i++) + c->costs[i] = c->lens[i] ? c->lens[i] : unseen_cost; + if (unseen_cost < 15) + unseen_cost++; + } + } + c->freqs[XPRESS_END_OF_DATA]++; + xpress_make_huffman_code(c); + return next_chosen_item - c->chosen_items; +} + +/* Lazy parsing */ +static u32 +xpress_choose_items_lazy(struct xpress_compressor *c) +{ + struct lz_mf *mf; + u32 len_3_too_far; + const u8 *window_ptr; + const u8 *window_end; + u32 num_matches; + struct lz_match matches[min(c->params.nice_match_length, c->params.max_search_depth)]; + struct xpress_item *next_chosen_item; + struct lz_match prev_match; + + mf = c->mf; + + lz_mf_load_window(mf, c->cur_window, c->cur_window_size); + + if (c->cur_window_size <= 8192) + len_3_too_far = 2048; + else + len_3_too_far = 4096; + + memset(c->freqs, 0, sizeof(c->freqs)); + + window_ptr = c->cur_window; + window_end = c->cur_window + c->cur_window_size; + next_chosen_item = c->chosen_items; + + for (;;) { + + /* Don't have match at previous position */ + + num_matches = lz_mf_get_matches(mf, matches); + window_ptr++; + + if (num_matches == 0 || + (matches[num_matches - 1].len == 3 && + matches[num_matches - 1].offset >= len_3_too_far)) + { + /* No matches found => output literal */ + *next_chosen_item++ = xpress_tally_literal(*(window_ptr - 1), + c->freqs); + if (window_ptr == window_end) + break; + continue; + } + + prev_match = matches[num_matches - 1]; + + have_prev_match: + /* Have match at previous position */ + + if (prev_match.len >= c->params.nice_match_length) { + /* Very long match found => output immediately */ + *next_chosen_item++ = xpress_tally_match(prev_match.len, + prev_match.offset, + c->freqs); + lz_mf_skip_positions(mf, prev_match.len - 1); + window_ptr += prev_match.len - 1; + if (window_ptr == window_end) + break; + continue; + } + + num_matches = lz_mf_get_matches(mf, matches); + window_ptr++; + + if (num_matches == 0 || + (matches[num_matches - 1].len <= prev_match.len)) + { + /* Next match is not longer => output previous match */ + *next_chosen_item++ = xpress_tally_match(prev_match.len, + prev_match.offset, + c->freqs); + lz_mf_skip_positions(mf, prev_match.len - 2); + window_ptr += prev_match.len - 2; + if (window_ptr == window_end) + break; + continue; + } + + /* Next match is longer => output literal */ + + *next_chosen_item++ = xpress_tally_literal(*(window_ptr - 2), + c->freqs); + + prev_match = matches[num_matches - 1]; + + goto have_prev_match; + } + + c->freqs[XPRESS_END_OF_DATA]++; + xpress_make_huffman_code(c); + return next_chosen_item - c->chosen_items; +} + +/* Greedy parsing */ +static u32 +xpress_choose_items_greedy(struct xpress_compressor *c) +{ + struct lz_mf *mf; + u32 len_3_too_far; + const u8 *window_ptr; + const u8 *window_end; + struct lz_match matches[min(c->params.nice_match_length, c->params.max_search_depth)]; + u32 num_matches; + struct xpress_item *next_chosen_item; + + mf = c->mf; + + lz_mf_load_window(mf, c->cur_window, c->cur_window_size); + + if (c->cur_window_size <= 8192) + len_3_too_far = 2048; + else + len_3_too_far = 4096; + + memset(c->freqs, 0, sizeof(c->freqs)); + + window_ptr = c->cur_window; + window_end = c->cur_window + c->cur_window_size; + next_chosen_item = c->chosen_items; + + do { + /* Get longest match at the current position. */ + num_matches = lz_mf_get_matches(mf, matches); + + if (num_matches == 0 || + (matches[num_matches - 1].len == 3 && + matches[num_matches - 1].offset >= len_3_too_far)) + { + *next_chosen_item++ = xpress_tally_literal(*window_ptr, c->freqs); + window_ptr += 1; + } else { + u32 len = matches[num_matches - 1].len; + u32 offset = matches[num_matches - 1].offset; + + *next_chosen_item++ = xpress_tally_match(len, offset, c->freqs); + lz_mf_skip_positions(mf, len - 1); + window_ptr += len; + } + } while (window_ptr != window_end); + + c->freqs[XPRESS_END_OF_DATA]++; + xpress_make_huffman_code(c); + return next_chosen_item - c->chosen_items; +} + +/* Huffman-only parsing */ +static u32 +xpress_choose_items_huffonly(struct xpress_compressor *c) +{ + const u8 *window_ptr; + const u8 *window_end; + struct xpress_item *next_chosen_item; + + memset(c->freqs, 0, sizeof(c->freqs)); + + window_ptr = c->cur_window; + window_end = c->cur_window + c->cur_window_size; + next_chosen_item = c->chosen_items; + + do { + *next_chosen_item++ = xpress_tally_literal(*window_ptr++, c->freqs); + } while (window_ptr != window_end); + + c->freqs[XPRESS_END_OF_DATA]++; + xpress_make_huffman_code(c); + return next_chosen_item - c->chosen_items; } +/* Given the specified compression level and maximum window size, build the + * parameters to use for XPRESS compression. */ static void -xpress_free_compressor(void *_c) +xpress_build_params(unsigned int compression_level, u32 max_window_size, + struct xpress_compressor_params *xpress_params) { - struct xpress_compressor *c = _c; + memset(xpress_params, 0, sizeof(*xpress_params)); - if (c) { - FREE(c->window); - FREE(c->matches); - FREE(c->prev_tab); - FREE(c); + if (compression_level == 1) { + + /* Huffman only (no Lempel-Ziv matches) */ + xpress_params->mf_algo = LZ_MF_NULL; + xpress_params->choose_items_func = xpress_choose_items_huffonly; + + } else if (compression_level < 30) { + + /* Greedy parsing */ + xpress_params->mf_algo = LZ_MF_HASH_CHAINS; + xpress_params->choose_items_func = xpress_choose_items_greedy; + xpress_params->nice_match_length = compression_level; + xpress_params->max_search_depth = compression_level / 2; + + } else if (compression_level < 60) { + + /* Lazy parsing */ + xpress_params->mf_algo = LZ_MF_HASH_CHAINS; + xpress_params->choose_items_func = xpress_choose_items_lazy; + xpress_params->nice_match_length = compression_level; + xpress_params->max_search_depth = compression_level / 2; + + } else { + + /* Near-optimal parsing */ + xpress_params->choose_items_func = xpress_choose_items_near_optimal; + if (max_window_size >= 32768) + xpress_params->mf_algo = LZ_MF_BINARY_TREES; + else + xpress_params->mf_algo = LZ_MF_HASH_CHAINS; + xpress_params->num_optim_passes = compression_level / 40; + xpress_params->nice_match_length = min(compression_level / 2, + XPRESS_MAX_MATCH_LEN); + xpress_params->max_search_depth = min(compression_level, + XPRESS_MAX_MATCH_LEN); + } +} + +/* Given the specified XPRESS parameters and maximum window size, build the + * parameters to use for match-finding. */ +static void +xpress_build_mf_params(const struct xpress_compressor_params *xpress_params, + u32 max_window_size, struct lz_mf_params *mf_params) +{ + memset(mf_params, 0, sizeof(*mf_params)); + + mf_params->algorithm = xpress_params->mf_algo; + mf_params->max_window_size = max_window_size; + mf_params->min_match_len = XPRESS_MIN_MATCH_LEN; + mf_params->max_match_len = XPRESS_MAX_MATCH_LEN; + mf_params->max_search_depth = xpress_params->max_search_depth; + mf_params->nice_match_len = xpress_params->nice_match_length; +} + +static inline bool +xpress_window_size_valid(size_t window_size) +{ + return (window_size > 0 && window_size <= XPRESS_MAX_OFFSET + 1); +} + +static void +xpress_free_compressor(void *_c); + +static u64 +xpress_get_needed_memory(size_t max_window_size, unsigned int compression_level) +{ + u64 size = 0; + struct xpress_compressor_params params; + + if (!xpress_window_size_valid(max_window_size)) + return 0; + + xpress_build_params(compression_level, max_window_size, ¶ms); + + size += sizeof(struct xpress_compressor); + + size += lz_mf_get_needed_memory(params.mf_algo, max_window_size); + + if (params.choose_items_func == xpress_choose_items_near_optimal) { + size += (XPRESS_OPTIM_ARRAY_LENGTH + params.nice_match_length) * + sizeof(struct xpress_mc_pos_data); + if (params.num_optim_passes > 1) { + size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS, + params.max_search_depth + 1); + size += cache_len * sizeof(struct lz_match); + } else { + size += params.max_search_depth * sizeof(struct lz_match); + } } + + size += max_window_size * sizeof(struct xpress_item); + + return size; } static int -xpress_create_compressor(size_t max_window_size, - const struct wimlib_compressor_params_header *params, +xpress_create_compressor(size_t max_window_size, unsigned int compression_level, void **c_ret) { struct xpress_compressor *c; + struct xpress_compressor_params params; + struct lz_mf_params mf_params; - if (max_window_size == 0 || max_window_size > (1U << 26)) + if (!xpress_window_size_valid(max_window_size)) return WIMLIB_ERR_INVALID_PARAM; - c = CALLOC(1, sizeof(struct xpress_compressor)); - if (c == NULL) - goto oom; + xpress_build_params(compression_level, max_window_size, ¶ms); + xpress_build_mf_params(¶ms, max_window_size, &mf_params); - c->window = MALLOC(max_window_size + 8); - if (c->window == NULL) + c = CALLOC(1, sizeof(struct xpress_compressor)); + if (!c) goto oom; - c->max_window_size = max_window_size; + c->params = params; - c->matches = MALLOC(max_window_size * sizeof(c->matches[0])); - if (c->matches == NULL) + c->mf = lz_mf_alloc(&mf_params); + if (!c->mf) goto oom; - c->prev_tab = MALLOC(max_window_size * sizeof(c->prev_tab[0])); - if (c->prev_tab == NULL) + if (params.choose_items_func == xpress_choose_items_near_optimal) { + c->optimum = MALLOC((XPRESS_OPTIM_ARRAY_LENGTH + + params.nice_match_length) * + sizeof(struct xpress_mc_pos_data)); + if (!c->optimum) + goto oom; + if (params.num_optim_passes > 1) { + size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS, + params.max_search_depth + 1); + c->cached_matches = MALLOC(cache_len * sizeof(struct lz_match)); + if (!c->cached_matches) + goto oom; + c->cache_limit = c->cached_matches + cache_len - + (params.max_search_depth + 1); + } else { + c->cached_matches = MALLOC(params.max_search_depth * + sizeof(struct lz_match)); + if (!c->cached_matches) + goto oom; + } + } + + c->chosen_items = MALLOC(max_window_size * sizeof(struct xpress_item)); + if (!c->chosen_items) goto oom; *c_ret = c; @@ -310,18 +1054,65 @@ oom: return WIMLIB_ERR_NOMEM; } -static u64 -xpress_get_needed_memory(size_t max_window_size, - const struct wimlib_compressor_params_header *params) +static size_t +xpress_compress(const void *uncompressed_data, size_t uncompressed_size, + void *compressed_data, size_t compressed_size_avail, void *_c) { - u64 size = 0; + struct xpress_compressor *c = _c; + u32 num_chosen_items; + u8 *cptr; + struct output_bitstream ostream; + u32 compressed_size; - size += sizeof(struct xpress_compressor); - size += max_window_size + 8; - size += max_window_size * sizeof(((struct xpress_compressor*)0)->matches[0]); - size += max_window_size * sizeof(((struct xpress_compressor*)0)->prev_tab[0]); + /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's + * impossible to compress 256 bytes or less of data to less than the + * input size. + * + * +1 to take into account that the buffer for compressed data is 1 byte + * smaller than the buffer for uncompressed data. + * + * +4 to take into account that init_output_bitstream() requires at + * least 4 bytes of data. */ + if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 1 + 4) + return 0; - return size; + /* Determine match/literal sequence to divide the data into. */ + c->cur_window = uncompressed_data; + c->cur_window_size = uncompressed_size; + num_chosen_items = (*c->params.choose_items_func)(c); + + /* Output the Huffman code as a series of 512 4-bit lengths. */ + cptr = compressed_data; + for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i += 2) + *cptr++ = (c->lens[i] & 0xf) | (c->lens[i + 1] << 4); + + /* Output the encoded matches/literals. */ + init_output_bitstream(&ostream, cptr, + compressed_size_avail - XPRESS_NUM_SYMBOLS / 2 - 1); + xpress_write_items(&ostream, c->chosen_items, num_chosen_items, + c->codewords, c->lens); + + /* Flush any pending data and get the length of the compressed data. */ + compressed_size = flush_output_bitstream(&ostream); + if (compressed_size == (u32)~0UL) + return 0; + + /* Return the length of the compressed data. */ + return compressed_size + XPRESS_NUM_SYMBOLS / 2; +} + +static void +xpress_free_compressor(void *_c) +{ + struct xpress_compressor *c = _c; + + if (c) { + lz_mf_free(c->mf); + FREE(c->cached_matches); + FREE(c->optimum); + FREE(c->chosen_items); + FREE(c); + } } const struct compressor_ops xpress_compressor_ops = {