+
+/*
+ * Return the next match or literal to use, delegating to the currently selected
+ * match-choosing algorithm.
+ *
+ * If the length of the returned 'struct lz_match' is less than
+ * LZX_MIN_MATCH_LEN, then it is really a literal.
+ */
+static inline struct lz_match
+lzx_choose_item(struct lzx_compressor *c)
+{
+ return (*c->params.choose_item_func)(c);
+}
+
+/* Set default symbol costs for the LZX Huffman codes. */
+static void
+lzx_set_default_costs(struct lzx_costs * costs, unsigned num_main_syms)
+{
+ unsigned i;
+
+ /* Main code (part 1): Literal symbols */
+ for (i = 0; i < LZX_NUM_CHARS; i++)
+ costs->main[i] = 8;
+
+ /* Main code (part 2): Match header symbols */
+ for (; i < num_main_syms; i++)
+ costs->main[i] = 10;
+
+ /* Length code */
+ for (i = 0; i < LZX_LENCODE_NUM_SYMBOLS; i++)
+ costs->len[i] = 8;
+
+ /* Aligned offset code */
+ for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++)
+ costs->aligned[i] = 3;
+}
+
+/* Given the frequencies of symbols in an LZX-compressed block and the
+ * corresponding Huffman codes, return LZX_BLOCKTYPE_ALIGNED or
+ * LZX_BLOCKTYPE_VERBATIM if an aligned offset or verbatim block, respectively,
+ * will take fewer bits to output. */
+static int
+lzx_choose_verbatim_or_aligned(const struct lzx_freqs * freqs,
+ const struct lzx_codes * codes)
+{
+ unsigned aligned_cost = 0;
+ unsigned verbatim_cost = 0;
+
+ /* Verbatim blocks have a constant 3 bits per position footer. Aligned
+ * offset blocks have an aligned offset symbol per position footer, plus
+ * an extra 24 bits per block to output the lengths necessary to
+ * reconstruct the aligned offset code itself. */
+ for (unsigned i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
+ verbatim_cost += 3 * freqs->aligned[i];
+ aligned_cost += codes->lens.aligned[i] * freqs->aligned[i];
+ }
+ aligned_cost += LZX_ALIGNEDCODE_ELEMENT_SIZE * LZX_ALIGNEDCODE_NUM_SYMBOLS;
+ if (aligned_cost < verbatim_cost)
+ return LZX_BLOCKTYPE_ALIGNED;
+ else
+ return LZX_BLOCKTYPE_VERBATIM;
+}
+
+/* Find a sequence of matches/literals with which to output the specified LZX
+ * block, then set the block's type to that which has the minimum cost to output
+ * (either verbatim or aligned). */
+static void
+lzx_choose_items_for_block(struct lzx_compressor *c, struct lzx_block_spec *spec)
+{
+ const struct lzx_lru_queue orig_queue = c->queue;
+ u32 num_passes_remaining = c->params.num_optim_passes;
+ struct lzx_freqs freqs;
+ const u8 *window_ptr;
+ const u8 *window_end;
+ struct lzx_item *next_chosen_item;
+ struct lz_match lz_match;
+ struct lzx_item lzx_item;
+
+ LZX_ASSERT(num_passes_remaining >= 1);
+ LZX_ASSERT(lz_mf_get_position(c->mf) == spec->window_pos);
+
+ c->match_window_end = spec->window_pos + spec->block_size;
+
+ if (c->params.num_optim_passes > 1) {
+ if (spec->block_size == c->cur_window_size)
+ c->get_matches_func = lzx_get_matches_fillcache_singleblock;
+ else
+ c->get_matches_func = lzx_get_matches_fillcache_multiblock;
+ c->skip_bytes_func = lzx_skip_bytes_fillcache;
+ } else {
+ if (spec->block_size == c->cur_window_size)
+ c->get_matches_func = lzx_get_matches_nocache_singleblock;
+ else
+ c->get_matches_func = lzx_get_matches_nocache_multiblock;
+ c->skip_bytes_func = lzx_skip_bytes_nocache;
+ }
+
+ /* The first optimal parsing pass is done using the cost model already
+ * set in c->costs. Each later pass is done using a cost model
+ * computed from the previous pass.
+ *
+ * To improve performance we only generate the array containing the
+ * matches and literals in intermediate form on the final pass. */
+
+ while (--num_passes_remaining) {
+ c->match_window_pos = spec->window_pos;
+ c->cache_ptr = c->cached_matches;
+ memset(&freqs, 0, sizeof(freqs));
+ window_ptr = &c->cur_window[spec->window_pos];
+ window_end = window_ptr + spec->block_size;
+
+ while (window_ptr != window_end) {
+
+ lz_match = lzx_choose_item(c);
+
+ LZX_ASSERT(!(lz_match.len == LZX_MIN_MATCH_LEN &&
+ lz_match.offset == c->max_window_size -
+ LZX_MIN_MATCH_LEN));
+ if (lz_match.len >= LZX_MIN_MATCH_LEN) {
+ lzx_tally_match(lz_match.len, lz_match.offset,
+ &freqs, &c->queue);
+ window_ptr += lz_match.len;
+ } else {
+ lzx_tally_literal(*window_ptr, &freqs);
+ window_ptr += 1;
+ }
+ }
+ lzx_make_huffman_codes(&freqs, &spec->codes, c->num_main_syms);
+ lzx_set_costs(c, &spec->codes.lens, 15);
+ c->queue = orig_queue;
+ if (c->cache_ptr <= c->cache_limit) {
+ c->get_matches_func = lzx_get_matches_usecache_nocheck;
+ c->skip_bytes_func = lzx_skip_bytes_usecache_nocheck;
+ } else {
+ c->get_matches_func = lzx_get_matches_usecache;
+ c->skip_bytes_func = lzx_skip_bytes_usecache;
+ }
+ }
+
+ c->match_window_pos = spec->window_pos;
+ c->cache_ptr = c->cached_matches;
+ memset(&freqs, 0, sizeof(freqs));
+ window_ptr = &c->cur_window[spec->window_pos];
+ window_end = window_ptr + spec->block_size;
+
+ spec->chosen_items = &c->chosen_items[spec->window_pos];
+ next_chosen_item = spec->chosen_items;
+
+ unsigned unseen_cost = 9;
+ while (window_ptr != window_end) {
+
+ lz_match = lzx_choose_item(c);
+
+ LZX_ASSERT(!(lz_match.len == LZX_MIN_MATCH_LEN &&
+ lz_match.offset == c->max_window_size -
+ LZX_MIN_MATCH_LEN));
+ if (lz_match.len >= LZX_MIN_MATCH_LEN) {
+ lzx_item.data = lzx_tally_match(lz_match.len,
+ lz_match.offset,
+ &freqs, &c->queue);
+ window_ptr += lz_match.len;
+ } else {
+ lzx_item.data = lzx_tally_literal(*window_ptr, &freqs);
+ window_ptr += 1;
+ }
+ *next_chosen_item++ = lzx_item;
+
+ /* When doing one-pass "near-optimal" parsing, update the cost
+ * model occassionally. */
+ if (unlikely((next_chosen_item - spec->chosen_items) % 2048 == 0) &&
+ c->params.choose_item_func == lzx_choose_near_optimal_item &&
+ c->params.num_optim_passes == 1)
+ {
+ lzx_make_huffman_codes(&freqs, &spec->codes, c->num_main_syms);
+ lzx_set_costs(c, &spec->codes.lens, unseen_cost);
+ if (unseen_cost < 15)
+ unseen_cost++;
+ }
+ }
+ spec->num_chosen_items = next_chosen_item - spec->chosen_items;
+ lzx_make_huffman_codes(&freqs, &spec->codes, c->num_main_syms);
+ spec->block_type = lzx_choose_verbatim_or_aligned(&freqs, &spec->codes);
+}
+
+/* Prepare the input window into one or more LZX blocks ready to be output. */
+static void
+lzx_prepare_blocks(struct lzx_compressor *c)
+{
+ /* Set up a default cost model. */
+ if (c->params.choose_item_func == lzx_choose_near_optimal_item)
+ lzx_set_default_costs(&c->costs, c->num_main_syms);
+
+ /* Set up the block specifications.
+ * TODO: The compression ratio could be slightly improved by performing
+ * data-dependent block splitting instead of using fixed-size blocks.
+ * Doing so well is a computationally hard problem, however. */
+ c->num_blocks = DIV_ROUND_UP(c->cur_window_size, LZX_DIV_BLOCK_SIZE);
+ for (unsigned i = 0; i < c->num_blocks; i++) {
+ u32 pos = LZX_DIV_BLOCK_SIZE * i;
+ c->block_specs[i].window_pos = pos;
+ c->block_specs[i].block_size = min(c->cur_window_size - pos,
+ LZX_DIV_BLOCK_SIZE);
+ }
+
+ /* Load the window into the match-finder. */
+ lz_mf_load_window(c->mf, c->cur_window, c->cur_window_size);
+
+ /* Determine sequence of matches/literals to output for each block. */
+ lzx_lru_queue_init(&c->queue);
+ c->optimum_cur_idx = 0;
+ c->optimum_end_idx = 0;
+ c->prev_match.len = 0;
+ for (unsigned i = 0; i < c->num_blocks; i++)
+ lzx_choose_items_for_block(c, &c->block_specs[i]);
+}
+
+static void
+lzx_build_params(unsigned int compression_level,
+ u32 max_window_size,
+ struct lzx_compressor_params *lzx_params)
+{
+ if (compression_level < 25) {
+ lzx_params->choose_item_func = lzx_choose_lazy_item;
+ lzx_params->num_optim_passes = 1;
+ if (max_window_size <= 262144)
+ lzx_params->mf_algo = LZ_MF_HASH_CHAINS;
+ else
+ lzx_params->mf_algo = LZ_MF_BINARY_TREES;
+ lzx_params->min_match_length = 3;
+ lzx_params->nice_match_length = 25 + compression_level * 2;
+ lzx_params->max_search_depth = 25 + compression_level;
+ } else {
+ lzx_params->choose_item_func = lzx_choose_near_optimal_item;
+ lzx_params->num_optim_passes = compression_level / 20;
+ if (max_window_size <= 32768 && lzx_params->num_optim_passes == 1)
+ lzx_params->mf_algo = LZ_MF_HASH_CHAINS;
+ else
+ lzx_params->mf_algo = LZ_MF_BINARY_TREES;
+ lzx_params->min_match_length = (compression_level >= 45) ? 2 : 3;
+ lzx_params->nice_match_length = min(((u64)compression_level * 32) / 50,
+ LZX_MAX_MATCH_LEN);
+ lzx_params->max_search_depth = min(((u64)compression_level * 50) / 50,
+ LZX_MAX_MATCH_LEN);
+ }
+}
+
+static void
+lzx_build_mf_params(const struct lzx_compressor_params *lzx_params,
+ u32 max_window_size, struct lz_mf_params *mf_params)
+{
+ memset(mf_params, 0, sizeof(*mf_params));
+
+ mf_params->algorithm = lzx_params->mf_algo;
+ mf_params->max_window_size = max_window_size;
+ mf_params->min_match_len = lzx_params->min_match_length;
+ mf_params->max_match_len = LZX_MAX_MATCH_LEN;
+ mf_params->max_search_depth = lzx_params->max_search_depth;
+ mf_params->nice_match_len = lzx_params->nice_match_length;
+}
+
+static void
+lzx_free_compressor(void *_c);
+
+static u64
+lzx_get_needed_memory(size_t max_block_size, unsigned int compression_level)
+{
+ struct lzx_compressor_params params;
+ u64 size = 0;
+ unsigned window_order;
+ u32 max_window_size;
+
+ window_order = lzx_get_window_order(max_block_size);
+ if (window_order == 0)
+ return 0;
+ max_window_size = max_block_size;
+
+ lzx_build_params(compression_level, max_window_size, ¶ms);
+
+ size += sizeof(struct lzx_compressor);
+
+ size += max_window_size;
+
+ size += DIV_ROUND_UP(max_window_size, LZX_DIV_BLOCK_SIZE) *
+ sizeof(struct lzx_block_spec);
+
+ size += max_window_size * sizeof(struct lzx_item);
+
+ size += lz_mf_get_needed_memory(params.mf_algo, max_window_size);
+ if (params.choose_item_func == lzx_choose_near_optimal_item) {
+ size += (LZX_OPTIM_ARRAY_LENGTH + params.nice_match_length) *
+ sizeof(struct lzx_mc_pos_data);
+ }
+ if (params.num_optim_passes > 1)
+ size += LZX_CACHE_LEN * sizeof(struct lz_match);
+ else
+ size += LZX_MAX_MATCHES_PER_POS * sizeof(struct lz_match);
+ return size;
+}
+
+static int
+lzx_create_compressor(size_t max_block_size, unsigned int compression_level,
+ void **c_ret)
+{
+ struct lzx_compressor *c;
+ struct lzx_compressor_params params;
+ struct lz_mf_params mf_params;
+ unsigned window_order;
+ u32 max_window_size;
+
+ window_order = lzx_get_window_order(max_block_size);
+ if (window_order == 0)
+ return WIMLIB_ERR_INVALID_PARAM;
+ max_window_size = max_block_size;
+
+ lzx_build_params(compression_level, max_window_size, ¶ms);
+ lzx_build_mf_params(¶ms, max_window_size, &mf_params);
+ if (!lz_mf_params_valid(&mf_params))
+ return WIMLIB_ERR_INVALID_PARAM;
+
+ c = CALLOC(1, sizeof(struct lzx_compressor));
+ if (!c)
+ goto oom;
+
+ c->params = params;
+ c->num_main_syms = lzx_get_num_main_syms(window_order);
+ c->max_window_size = max_window_size;
+ c->window_order = window_order;
+
+ c->cur_window = ALIGNED_MALLOC(max_window_size, 16);
+ if (!c->cur_window)
+ goto oom;
+
+ c->block_specs = MALLOC(DIV_ROUND_UP(max_window_size,
+ LZX_DIV_BLOCK_SIZE) *
+ sizeof(struct lzx_block_spec));
+ if (!c->block_specs)
+ goto oom;
+
+ c->chosen_items = MALLOC(max_window_size * sizeof(struct lzx_item));
+ if (!c->chosen_items)
+ goto oom;
+
+ c->mf = lz_mf_alloc(&mf_params);
+ if (!c->mf)
+ goto oom;
+
+ if (params.choose_item_func == lzx_choose_near_optimal_item) {
+ c->optimum = MALLOC((LZX_OPTIM_ARRAY_LENGTH +
+ params.nice_match_length) *
+ sizeof(struct lzx_mc_pos_data));
+ if (!c->optimum)
+ goto oom;
+ }
+
+ if (params.num_optim_passes > 1) {
+ c->cached_matches = MALLOC(LZX_CACHE_LEN *
+ sizeof(struct lz_match));
+ if (!c->cached_matches)
+ goto oom;
+ c->cache_limit = c->cached_matches + LZX_CACHE_LEN -
+ (LZX_MAX_MATCHES_PER_POS + 1);
+ } else {
+ c->cached_matches = MALLOC(LZX_MAX_MATCHES_PER_POS *
+ sizeof(struct lz_match));
+ if (!c->cached_matches)
+ goto oom;
+ }
+
+ *c_ret = c;
+ return 0;
+
+oom:
+ lzx_free_compressor(c);
+ return WIMLIB_ERR_NOMEM;
+}
+
+static size_t
+lzx_compress(const void *uncompressed_data, size_t uncompressed_size,
+ void *compressed_data, size_t compressed_size_avail, void *_c)
+{
+ struct lzx_compressor *c = _c;
+ struct lzx_output_bitstream os;
+
+ /* Don't bother compressing very small inputs. */
+ if (uncompressed_size < 100)
+ return 0;
+
+ /* The input data must be preprocessed. To avoid changing the original
+ * input, copy it to a temporary buffer. */
+ memcpy(c->cur_window, uncompressed_data, uncompressed_size);
+ c->cur_window_size = uncompressed_size;
+
+ /* Preprocess the data. */
+ lzx_do_e8_preprocessing(c->cur_window, c->cur_window_size);
+
+ /* Prepare the compressed data. */
+ lzx_prepare_blocks(c);
+
+ /* Generate the compressed data and return its size, or 0 if an overflow
+ * occurred. */
+ lzx_init_output(&os, compressed_data, compressed_size_avail);
+ lzx_write_all_blocks(c, &os);
+ return lzx_flush_output(&os);
+}
+
+static void
+lzx_free_compressor(void *_c)
+{
+ struct lzx_compressor *c = _c;
+
+ if (c) {
+ ALIGNED_FREE(c->cur_window);
+ FREE(c->block_specs);
+ FREE(c->chosen_items);
+ lz_mf_free(c->mf);
+ FREE(c->optimum);
+ FREE(c->cached_matches);
+ FREE(c);
+ }
+}
+
+const struct compressor_ops lzx_compressor_ops = {
+ .get_needed_memory = lzx_get_needed_memory,
+ .create_compressor = lzx_create_compressor,
+ .compress = lzx_compress,
+ .free_compressor = lzx_free_compressor,
+};