X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fxpress-compress.c;h=e1884978377f0ac6d1555282600e775aa8141ab2;hp=6eaa2e69bf9546f58c30de1b7966990247233b0d;hb=4dae2eef895b95d1c2bc1bf5fc17413af8cc7952;hpb=4dd45340f9fe3a533e0f1a9d6b79f8118e45ca2a

diff --git a/src/xpress-compress.c b/src/xpress-compress.c
index 6eaa2e69..e1884978 100644
--- a/src/xpress-compress.c
+++ b/src/xpress-compress.c
@@ -30,6 +30,7 @@
 
 #include "wimlib/compressor_ops.h"
 #include "wimlib/compress_common.h"
+#include "wimlib/endianness.h"
 #include "wimlib/error.h"
 #include "wimlib/lz_mf.h"
 #include "wimlib/util.h"
@@ -45,11 +46,25 @@ struct xpress_item;
 struct xpress_mc_pos_data;
 
 struct xpress_compressor_params {
-	struct lz_match (*choose_item_func)(struct xpress_compressor *);
+
+	/* Only used when choose_items_func == xpress_choose_items_near_optimal  */
 	u32 num_optim_passes;
+
+	/* Given the data to compress (c->cur_window, c->cur_window_size),
+	 * 'choose_items_func' fills in c->chosen_items with the intermediate
+	 * representation of the match/literal sequence to output.  Also fills
+	 * in c->codewords and c->lens to provide the Huffman code with which
+	 * these items should be output.
+	 *
+	 * Returns the number of items written to c->chosen_items.  This can be
+	 * at most c->cur_window_size.  (The worst case is all literals, no
+	 * matches.)  */
+	u32 (*choose_items_func)(struct xpress_compressor *c);
+
+	/* Match-finding algorithm and parameters  */
 	enum lz_mf_algo mf_algo;
-	u32 nice_match_length;
 	u32 max_search_depth;
+	u32 nice_match_length;
 };
 
 /* XPRESS compressor state.  */
@@ -58,37 +73,29 @@ struct xpress_compressor {
 	/* Parameters determined based on the compression level.  */
 	struct xpress_compressor_params params;
 
-	unsigned (*get_matches_func)(struct xpress_compressor *,
-				     const struct lz_match **);
-	void (*skip_bytes_func)(struct xpress_compressor *, u32 n);
-	u32 len_3_too_far;
-
-	/* Data currently being compressed  */
-	const u8 *cur_window;
-	u32 cur_window_size;
-
 	/* Lempel-Ziv match-finder  */
 	struct lz_mf *mf;
 
+	/* Optimal parsing data  */
+	unsigned (*get_matches_func)(struct xpress_compressor *,
+				     const struct lz_match **);
+	void (*skip_bytes_func)(struct xpress_compressor *, u32 n);
 	const u8 *cur_window_ptr;
-
-	/* Match cache, used when doing multiple optimization passes.  */
 	struct lz_match *cached_matches;
 	struct lz_match *cache_ptr;
 	struct lz_match *cache_limit;
-
-	/* Optimal parsing data  */
 	struct xpress_mc_pos_data *optimum;
 	unsigned optimum_cur_idx;
 	unsigned optimum_end_idx;
 	u8 costs[XPRESS_NUM_SYMBOLS];
 
-	/* Lazy parsing data  */
-	struct lz_match prev_match;
-
 	/* The selected sequence of matches/literals  */
 	struct xpress_item *chosen_items;
 
+	/* Data currently being compressed  */
+	const u8 *cur_window;
+	u32 cur_window_size;
+
 	/* Symbol frequency counters  */
 	u32 freqs[XPRESS_NUM_SYMBOLS];
 
@@ -122,9 +129,121 @@ struct xpress_item {
 	/* For literals, offset == 0 and adjusted_len is the literal byte.  */
 };
 
+/*
+ * Structure to keep track of the current state of sending data to the
+ * compressed output buffer.
+ *
+ * The XPRESS bitstream is encoded as a sequence of little endian 16-bit coding
+ * units interwoven with literal bytes.
+ */
+struct xpress_output_bitstream {
+
+	/* Bits that haven't yet been written to the output buffer.  */
+	u32 bitbuf;
+
+	/* Number of bits currently held in @bitbuf.  */
+	u32 bitcount;
+
+	/* Pointer to the start of the output buffer.  */
+	u8 *start;
+
+	/* Pointer to the location in the ouput buffer at which to write the
+	 * next 16 bits.  */
+	u8 *next_bits;
+
+	/* Pointer to the location in the output buffer at which to write the
+	 * next 16 bits, after @next_bits.  */
+	u8 *next_bits2;
+
+	/* Pointer to the location in the output buffer at which to write the
+	 * next literal byte.  */
+	u8 *next_byte;
+
+	/* Pointer to the end of the output buffer.  */
+	u8 *end;
+};
+
+/*
+ * Initialize the output bitstream.
+ *
+ * @os
+ *	The output bitstream structure to initialize.
+ * @buffer
+ *	The buffer to write to.
+ * @size
+ *	Size of @buffer, in bytes.  Must be at least 4.
+ */
+static void
+xpress_init_output(struct xpress_output_bitstream *os, void *buffer, u32 size)
+{
+	os->bitbuf = 0;
+	os->bitcount = 0;
+	os->start = buffer;
+	os->next_bits = os->start;
+	os->next_bits2 = os->start + 2;
+	os->next_byte = os->start + 4;
+	os->end = os->start + size;
+}
+
+/*
+ * Write some bits to the output bitstream.
+ *
+ * The bits are given by the low-order @num_bits bits of @bits.  Higher-order
+ * bits in @bits cannot be set.  At most 16 bits can be written at once.
+ *
+ * If the output buffer space is exhausted, then the bits will be ignored, and
+ * xpress_flush_output() will return 0 when it gets called.
+ */
+static _always_inline_attribute void
+xpress_write_bits(struct xpress_output_bitstream *os,
+		  const u32 bits, const unsigned int num_bits)
+{
+	/* This code is optimized for XPRESS, which never needs to write more
+	 * than 16 bits at once.  */
+
+	os->bitcount += num_bits;
+	os->bitbuf = (os->bitbuf << num_bits) | bits;
+
+	if (os->bitcount > 16) {
+		os->bitcount -= 16;
+		if (os->end - os->next_byte >= 2) {
+			*(le16 *)os->next_bits = cpu_to_le16(os->bitbuf >> os->bitcount);
+			os->next_bits = os->next_bits2;
+			os->next_bits2 = os->next_byte;
+			os->next_byte += 2;
+		}
+	}
+}
+
+/*
+ * Interweave a literal byte into the output bitstream.
+ */
+static _always_inline_attribute void
+xpress_write_byte(struct xpress_output_bitstream *os, u8 byte)
+{
+	if (os->next_byte < os->end)
+		*os->next_byte++ = byte;
+}
+
+/*
+ * Flush the last coding unit to the output buffer if needed.  Return the total
+ * number of bytes written to the output buffer, or 0 if an overflow occurred.
+ */
+static u32
+xpress_flush_output(struct xpress_output_bitstream *os)
+{
+	if (unlikely(os->end - os->next_byte < 2))
+		return 0;
+
+	*(le16 *)os->next_bits = cpu_to_le16(os->bitbuf << (16 - os->bitcount));
+	*(le16 *)os->next_bits2 = cpu_to_le16(0);
+
+	return os->next_byte - os->start;
+}
+
 /* Output an XPRESS match.  */
 static void
-xpress_write_match(struct xpress_item match, struct output_bitstream *ostream,
+xpress_write_match(struct xpress_item match, struct xpress_output_bitstream *os,
 		   const u32 codewords[], const u8 lens[])
 {
 	unsigned len_hdr = min(match.adjusted_len, 0xf);
@@ -132,41 +251,41 @@ xpress_write_match(struct xpress_item match, struct output_bitstream *ostream,
 	unsigned sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr);
 
 	/* Huffman symbol  */
-	bitstream_put_bits(ostream, codewords[sym], lens[sym]);
+	xpress_write_bits(os, codewords[sym], lens[sym]);
 
 	/* If length >= 18, one extra length byte.
 	 * If length >= 273, three (total) extra length bytes.  */
 	if (match.adjusted_len >= 0xf) {
 		u8 byte1 = min(match.adjusted_len - 0xf, 0xff);
-		bitstream_put_byte(ostream, byte1);
+		xpress_write_byte(os, byte1);
 		if (byte1 == 0xff) {
-			bitstream_put_byte(ostream, match.adjusted_len & 0xff);
-			bitstream_put_byte(ostream, match.adjusted_len >> 8);
+			xpress_write_byte(os, match.adjusted_len & 0xff);
+			xpress_write_byte(os, match.adjusted_len >> 8);
 		}
 	}
 
 	/* Offset bits  */
-	bitstream_put_bits(ostream, match.offset ^ (1U << offset_bsr), offset_bsr);
+	xpress_write_bits(os, match.offset ^ (1U << offset_bsr), offset_bsr);
 }
 
 /* Output a sequence of XPRESS matches and literals.  */
 static void
-xpress_write_items(struct output_bitstream *ostream,
+xpress_write_items(struct xpress_output_bitstream *os,
 		   const struct xpress_item items[], u32 num_items,
 		   const u32 codewords[], const u8 lens[])
 {
 	for (u32 i = 0; i < num_items; i++) {
 		if (items[i].offset) {
 			/* Match  */
-			xpress_write_match(items[i], ostream, codewords, lens);
+			xpress_write_match(items[i], os, codewords, lens);
 		} else {
 			/* Literal  */
 			unsigned lit = items[i].adjusted_len;
-			bitstream_put_bits(ostream, codewords[lit], lens[lit]);
+			xpress_write_bits(os, codewords[lit], lens[lit]);
 		}
 	}
 	/* End-of-data symbol (required for MS compatibility)  */
-	bitstream_put_bits(ostream, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]);
+	xpress_write_bits(os, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]);
 }
 
 /* Make the Huffman code for XPRESS.
@@ -597,98 +716,6 @@ xpress_choose_near_optimal_item(struct xpress_compressor *c)
 	return xpress_match_chooser_reverse_list(c, cur_pos);
 }
 
-/* Lazy parsing.  */
-static struct lz_match
-xpress_choose_lazy_item(struct xpress_compressor *c)
-{
-	const struct lz_match *matches;
-	struct lz_match cur_match;
-	struct lz_match next_match;
-	u32 num_matches;
-
-	if (c->prev_match.len) {
-		cur_match = c->prev_match;
-		c->prev_match.len = 0;
-	} else {
-		num_matches = xpress_get_matches(c, &matches);
-		if (num_matches == 0 ||
-		    (matches[num_matches - 1].len == 3 &&
-		     matches[num_matches - 1].offset >= c->len_3_too_far))
-		{
-			cur_match.len = 0;
-			return cur_match;
-		}
-
-		/* With lazy parsing we only consider the longest match at each
-		 * position.  */
-		cur_match = matches[num_matches - 1];
-	}
-
-	if (cur_match.len >= c->params.nice_match_length) {
-		xpress_skip_bytes(c, cur_match.len - 1);
-		return cur_match;
-	}
-
-	num_matches = xpress_get_matches(c, &matches);
-	if (num_matches == 0 ||
-	    (matches[num_matches - 1].len == 3 &&
-	     matches[num_matches - 1].offset >= c->len_3_too_far))
-	{
-		xpress_skip_bytes(c, cur_match.len - 2);
-		return cur_match;
-	}
-
-	next_match = matches[num_matches - 1];
-
-	if (next_match.len <= cur_match.len) {
-		xpress_skip_bytes(c, cur_match.len - 2);
-		return cur_match;
-	} else {
-		/* Longer match at next position.  Choose a literal here so we
-		 * will get to use the longer match.  */
-		c->prev_match = next_match;
-		cur_match.len = 0;
-		return cur_match;
-	}
-}
-
-/* Greedy parsing.  */
-static struct lz_match
-xpress_choose_greedy_item(struct xpress_compressor *c)
-{
-	const struct lz_match *matches;
-	u32 num_matches;
-
-	num_matches = xpress_get_matches(c, &matches);
-	if (num_matches == 0 ||
-	    (matches[num_matches - 1].len == 3 &&
-	     matches[num_matches - 1].offset >= c->len_3_too_far))
-		return (struct lz_match) {};
-
-	xpress_skip_bytes(c, matches[num_matches - 1].len - 1);
-	return matches[num_matches - 1];
-}
-
-/* Always choose a literal.  */
-static struct lz_match
-xpress_choose_literal(struct xpress_compressor *c)
-{
-	return (struct lz_match) {};
-}
-
-/*
- * Return the next match or literal to use, delegating to the currently selected
- * match-choosing algorithm.
- *
- * If the length of the returned 'struct lz_match' is less than
- * XPRESS_MIN_MATCH_LEN, then it is really a literal.
- */
-static inline struct lz_match
-xpress_choose_item(struct xpress_compressor *c)
-{
-	return (*c->params.choose_item_func)(c);
-}
-
 /* Set default XPRESS Huffman symbol costs to kick-start the iterative
  * optimization algorithm.  */
 static void
@@ -713,17 +740,9 @@ xpress_set_costs(u8 costs[], const u8 lens[])
 		costs[i] = lens[i] ? lens[i] : XPRESS_MAX_CODEWORD_LEN;
 }
 
-/*
- * Given the data to compress (c->cur_window, c->cur_window_size), fills in
- * c->chosen_items with the intermediate representation of the match/literal
- * sequence to output.  Also fills in c->codewords and c->lens to provide the
- * Huffman code with which these items should be output.
- *
- * Returns the number of items written to c->chosen_items.  This can be at most
- * c->cur_window_size.  (The worst case is all literals, no matches.)
- */
+/* Near-optimal parsing  */
 static u32
-xpress_choose_items(struct xpress_compressor *c)
+xpress_choose_items_near_optimal(struct xpress_compressor *c)
 {
 	u32 num_passes_remaining = c->params.num_optim_passes;
 	const u8 *window_ptr;
@@ -732,17 +751,9 @@ xpress_choose_items(struct xpress_compressor *c)
 	struct lz_match raw_item;
 	struct xpress_item xpress_item;
 
-	if (c->params.choose_item_func == xpress_choose_near_optimal_item) {
-		xpress_set_default_costs(c->costs);
-		c->optimum_cur_idx = 0;
-		c->optimum_end_idx = 0;
-	} else {
-		c->prev_match.len = 0;
-		if (c->cur_window_size <= 8192)
-			c->len_3_too_far = 2048;
-		else
-			c->len_3_too_far = 4096;
-	}
+	xpress_set_default_costs(c->costs);
+	c->optimum_cur_idx = 0;
+	c->optimum_end_idx = 0;
 
 	if (c->params.num_optim_passes > 1) {
 		c->get_matches_func = xpress_get_matches_fillcache;
@@ -755,13 +766,14 @@ xpress_choose_items(struct xpress_compressor *c)
 	lz_mf_load_window(c->mf, c->cur_window, c->cur_window_size);
 
 	while (--num_passes_remaining) {
-		window_ptr = c->cur_window_ptr = c->cur_window;
+		c->cur_window_ptr = c->cur_window;
+		window_ptr = c->cur_window;
 		window_end = window_ptr + c->cur_window_size;
 		c->cache_ptr = c->cached_matches;
 		memset(c->freqs, 0, sizeof(c->freqs));
 
 		while (window_ptr != window_end) {
-			raw_item = xpress_choose_item(c);
+			raw_item = xpress_choose_near_optimal_item(c);
 			if (raw_item.len >= XPRESS_MIN_MATCH_LEN) {
 				xpress_tally_match(raw_item.len,
 						   raw_item.offset, c->freqs);
@@ -783,7 +795,8 @@ xpress_choose_items(struct xpress_compressor *c)
 		}
 	}
 
-	window_ptr = c->cur_window_ptr = c->cur_window;
+	c->cur_window_ptr = c->cur_window;
+	window_ptr = c->cur_window;
 	window_end = window_ptr + c->cur_window_size;
 	c->cache_ptr = c->cached_matches;
 	memset(c->freqs, 0, sizeof(c->freqs));
@@ -791,7 +804,7 @@ xpress_choose_items(struct xpress_compressor *c)
 
 	u32 unseen_cost = 9;
 	while (window_ptr != window_end) {
-		raw_item = xpress_choose_item(c);
+		raw_item = xpress_choose_near_optimal_item(c);
 		if (raw_item.len >= XPRESS_MIN_MATCH_LEN) {
 			xpress_item = xpress_tally_match(raw_item.len,
 							 raw_item.offset,
@@ -807,7 +820,6 @@ xpress_choose_items(struct xpress_compressor *c)
 		/* When doing one-pass near-optimal parsing, rebuild the Huffman
 		 * code occasionally.  */
 		if (unlikely((next_chosen_item - c->chosen_items) % 2048 == 0) &&
-		    c->params.choose_item_func == xpress_choose_near_optimal_item &&
 		    c->cur_window_size >= 16384 &&
 		    c->params.num_optim_passes == 1)
 		{
@@ -823,6 +835,177 @@ xpress_choose_items(struct xpress_compressor *c)
 	return next_chosen_item - c->chosen_items;
 }
 
+/* Lazy parsing  */
+static u32
+xpress_choose_items_lazy(struct xpress_compressor *c)
+{
+	struct lz_mf *mf;
+	u32 len_3_too_far;
+	const u8 *window_ptr;
+	const u8 *window_end;
+	u32 num_matches;
+	struct lz_match matches[min(c->params.nice_match_length, c->params.max_search_depth)];
+	struct xpress_item *next_chosen_item;
+	struct lz_match prev_match;
+
+	mf = c->mf;
+
+	lz_mf_load_window(mf, c->cur_window, c->cur_window_size);
+
+	if (c->cur_window_size <= 8192)
+		len_3_too_far = 2048;
+	else
+		len_3_too_far = 4096;
+
+	memset(c->freqs, 0, sizeof(c->freqs));
+
+	window_ptr = c->cur_window;
+	window_end = c->cur_window + c->cur_window_size;
+	next_chosen_item = c->chosen_items;
+
+	for (;;) {
+
+		/* Don't have match at previous position  */
+
+		num_matches = lz_mf_get_matches(mf, matches);
+		window_ptr++;
+
+		if (num_matches == 0 ||
+		    (matches[num_matches - 1].len == 3 &&
+		     matches[num_matches - 1].offset >= len_3_too_far))
+		{
+			/* No matches found => output literal  */
+			*next_chosen_item++ = xpress_tally_literal(*(window_ptr - 1),
+								   c->freqs);
+			if (window_ptr == window_end)
+				break;
+			continue;
+		}
+
+		prev_match = matches[num_matches - 1];
+
+	have_prev_match:
+		/* Have match at previous position  */
+
+		if (prev_match.len >= c->params.nice_match_length) {
+			/* Very long match found => output immediately  */
+			*next_chosen_item++ = xpress_tally_match(prev_match.len,
+								 prev_match.offset,
+								 c->freqs);
+			lz_mf_skip_positions(mf, prev_match.len - 1);
+			window_ptr += prev_match.len - 1;
+			if (window_ptr == window_end)
+				break;
+			continue;
+		}
+
+		num_matches = lz_mf_get_matches(mf, matches);
+		window_ptr++;
+
+		if (num_matches == 0 ||
+		    (matches[num_matches - 1].len <= prev_match.len))
+		{
+			/* Next match is not longer => output previous match  */
+			*next_chosen_item++ = xpress_tally_match(prev_match.len,
+								 prev_match.offset,
+								 c->freqs);
+			lz_mf_skip_positions(mf, prev_match.len - 2);
+			window_ptr += prev_match.len - 2;
+			if (window_ptr == window_end)
+				break;
+			continue;
+		}
+
+		/* Next match is longer => output literal  */
+
+		*next_chosen_item++ = xpress_tally_literal(*(window_ptr - 2),
+							   c->freqs);
+
+		prev_match = matches[num_matches - 1];
+
+		goto have_prev_match;
+	}
+
+	c->freqs[XPRESS_END_OF_DATA]++;
+	xpress_make_huffman_code(c);
+	return next_chosen_item - c->chosen_items;
+}
+
+/* Greedy parsing  */
+static u32
+xpress_choose_items_greedy(struct xpress_compressor *c)
+{
+	struct lz_mf *mf;
+	u32 len_3_too_far;
+	const u8 *window_ptr;
+	const u8 *window_end;
+	struct lz_match matches[min(c->params.nice_match_length, c->params.max_search_depth)];
+	u32 num_matches;
+	struct xpress_item *next_chosen_item;
+
+	mf = c->mf;
+
+	lz_mf_load_window(mf, c->cur_window, c->cur_window_size);
+
+	if (c->cur_window_size <= 8192)
+		len_3_too_far = 2048;
+	else
+		len_3_too_far = 4096;
+
+	memset(c->freqs, 0, sizeof(c->freqs));
+
+	window_ptr = c->cur_window;
+	window_end = c->cur_window + c->cur_window_size;
+	next_chosen_item = c->chosen_items;
+
+	do {
+		/* Get longest match at the current position.  */
+		num_matches = lz_mf_get_matches(mf, matches);
+
+		if (num_matches == 0 ||
+		    (matches[num_matches - 1].len == 3 &&
+		     matches[num_matches - 1].offset >= len_3_too_far))
+		{
+			*next_chosen_item++ = xpress_tally_literal(*window_ptr, c->freqs);
+			window_ptr += 1;
+		} else {
+			u32 len = matches[num_matches - 1].len;
+			u32 offset = matches[num_matches - 1].offset;
+
+			*next_chosen_item++ = xpress_tally_match(len, offset, c->freqs);
+			lz_mf_skip_positions(mf, len - 1);
+			window_ptr += len;
+		}
+	} while (window_ptr != window_end);
+
+	c->freqs[XPRESS_END_OF_DATA]++;
+	xpress_make_huffman_code(c);
+	return next_chosen_item - c->chosen_items;
+}
+
+/* Huffman-only parsing  */
+static u32
+xpress_choose_items_huffonly(struct xpress_compressor *c)
+{
+	const u8 *window_ptr;
+	const u8 *window_end;
+	struct xpress_item *next_chosen_item;
+
+	memset(c->freqs, 0, sizeof(c->freqs));
+
+	window_ptr = c->cur_window;
+	window_end = c->cur_window + c->cur_window_size;
+	next_chosen_item = c->chosen_items;
+
+	do {
+		*next_chosen_item++ = xpress_tally_literal(*window_ptr++, c->freqs);
+	} while (window_ptr != window_end);
+
+	c->freqs[XPRESS_END_OF_DATA]++;
+	xpress_make_huffman_code(c);
+	return next_chosen_item - c->chosen_items;
+}
+
 /* Given the specified compression level and maximum window size, build the
  * parameters to use for XPRESS compression.  */
 static void
@@ -835,15 +1018,13 @@ xpress_build_params(unsigned int compression_level, u32 max_window_size,
 
 		/* Huffman only (no Lempel-Ziv matches)  */
 		xpress_params->mf_algo = LZ_MF_NULL;
-		xpress_params->choose_item_func = xpress_choose_literal;
-		xpress_params->num_optim_passes = 1;
+		xpress_params->choose_items_func = xpress_choose_items_huffonly;
 
 	} else if (compression_level < 30) {
 
 		/* Greedy parsing  */
 		xpress_params->mf_algo = LZ_MF_HASH_CHAINS;
-		xpress_params->choose_item_func = xpress_choose_greedy_item;
-		xpress_params->num_optim_passes = 1;
+		xpress_params->choose_items_func = xpress_choose_items_greedy;
 		xpress_params->nice_match_length = compression_level;
 		xpress_params->max_search_depth = compression_level / 2;
 
@@ -851,15 +1032,14 @@ xpress_build_params(unsigned int compression_level, u32 max_window_size,
 
 		/* Lazy parsing  */
 		xpress_params->mf_algo = LZ_MF_HASH_CHAINS;
-		xpress_params->choose_item_func = xpress_choose_lazy_item;
-		xpress_params->num_optim_passes = 1;
+		xpress_params->choose_items_func = xpress_choose_items_lazy;
 		xpress_params->nice_match_length = compression_level;
 		xpress_params->max_search_depth = compression_level / 2;
 
 	} else {
 
 		/* Near-optimal parsing  */
-		xpress_params->choose_item_func = xpress_choose_near_optimal_item;
+		xpress_params->choose_items_func = xpress_choose_items_near_optimal;
 		if (max_window_size >= 32768)
 			xpress_params->mf_algo = LZ_MF_BINARY_TREES;
 		else
@@ -912,17 +1092,16 @@ xpress_get_needed_memory(size_t max_window_size, unsigned int compression_level)
 
 	size += lz_mf_get_needed_memory(params.mf_algo, max_window_size);
 
-	if (params.num_optim_passes > 1) {
-		size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS,
-				       params.max_search_depth + 1);
-		size += cache_len * sizeof(struct lz_match);
-	} else {
-		size += params.max_search_depth * sizeof(struct lz_match);
-	}
-
-	if (params.choose_item_func == xpress_choose_near_optimal_item) {
+	if (params.choose_items_func == xpress_choose_items_near_optimal) {
 		size += (XPRESS_OPTIM_ARRAY_LENGTH + params.nice_match_length) *
 				      sizeof(struct xpress_mc_pos_data);
+		if (params.num_optim_passes > 1) {
+			size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS,
+					       params.max_search_depth + 1);
+			size += cache_len * sizeof(struct lz_match);
+		} else {
+			size += params.max_search_depth * sizeof(struct lz_match);
+		}
 	}
 
 	size += max_window_size * sizeof(struct xpress_item);
@@ -954,27 +1133,26 @@ xpress_create_compressor(size_t max_window_size, unsigned int compression_level,
 	if (!c->mf)
 		goto oom;
 
-	if (params.num_optim_passes > 1) {
-		size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS,
-				       params.max_search_depth + 1);
-		c->cached_matches = MALLOC(cache_len * sizeof(struct lz_match));
-		if (!c->cached_matches)
-			goto oom;
-		c->cache_limit = c->cached_matches + cache_len -
-				   (params.max_search_depth + 1);
-	} else {
-		c->cached_matches = MALLOC(params.max_search_depth *
-					   sizeof(struct lz_match));
-		if (!c->cached_matches)
-			goto oom;
-	}
-
-	if (params.choose_item_func == xpress_choose_near_optimal_item) {
+	if (params.choose_items_func == xpress_choose_items_near_optimal) {
 		c->optimum = MALLOC((XPRESS_OPTIM_ARRAY_LENGTH +
 				     params.nice_match_length) *
 				      sizeof(struct xpress_mc_pos_data));
 		if (!c->optimum)
 			goto oom;
+		if (params.num_optim_passes > 1) {
+			size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS,
+					       params.max_search_depth + 1);
+			c->cached_matches = MALLOC(cache_len * sizeof(struct lz_match));
+			if (!c->cached_matches)
+				goto oom;
+			c->cache_limit = c->cached_matches + cache_len -
+					   (params.max_search_depth + 1);
+		} else {
+			c->cached_matches = MALLOC(params.max_search_depth *
+						   sizeof(struct lz_match));
+			if (!c->cached_matches)
+				goto oom;
+		}
 	}
 
 	c->chosen_items = MALLOC(max_window_size * sizeof(struct xpress_item));
@@ -996,25 +1174,19 @@ xpress_compress(const void *uncompressed_data, size_t uncompressed_size,
 	struct xpress_compressor *c = _c;
 	u32 num_chosen_items;
 	u8 *cptr;
-	struct output_bitstream ostream;
+	struct xpress_output_bitstream os;
 	u32 compressed_size;
 
 	/* XPRESS requires 256 bytes of overhead for the Huffman code, so it's
 	 * impossible to compress 256 bytes or less of data to less than the
-	 * input size.
-	 *
-	 * +1 to take into account that the buffer for compressed data is 1 byte
-	 * smaller than the buffer for uncompressed data.
-	 *
-	 * +4 to take into account that init_output_bitstream() requires at
-	 * least 4 bytes of data.  */
-	if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 1 + 4)
+	 * input size.  */
+	if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 50)
 		return 0;
 
 	/* Determine match/literal sequence to divide the data into.  */
 	c->cur_window = uncompressed_data;
 	c->cur_window_size = uncompressed_size;
-	num_chosen_items = xpress_choose_items(c);
+	num_chosen_items = (*c->params.choose_items_func)(c);
 
 	/* Output the Huffman code as a series of 512 4-bit lengths.  */
 	cptr = compressed_data;
@@ -1022,14 +1194,14 @@ xpress_compress(const void *uncompressed_data, size_t uncompressed_size,
 		*cptr++ = (c->lens[i] & 0xf) | (c->lens[i + 1] << 4);
 
 	/* Output the encoded matches/literals.  */
-	init_output_bitstream(&ostream, cptr,
-			      compressed_size_avail - XPRESS_NUM_SYMBOLS / 2 - 1);
-	xpress_write_items(&ostream, c->chosen_items, num_chosen_items,
+	xpress_init_output(&os, cptr,
+			   compressed_size_avail - XPRESS_NUM_SYMBOLS / 2);
+	xpress_write_items(&os, c->chosen_items, num_chosen_items,
 			   c->codewords, c->lens);
 
 	/* Flush any pending data and get the length of the compressed data.  */
-	compressed_size = flush_output_bitstream(&ostream);
-	if (compressed_size == (u32)~0UL)
+	compressed_size = xpress_flush_output(&os);
+	if (compressed_size == 0)
 		return 0;
 
 	/* Return the length of the compressed data.  */