4 * XPRESS compression routines.
6 * See the comments in xpress-decompress.c about the XPRESS format.
10 * Copyright (C) 2012, 2013, 2014 Eric Biggers
12 * This file is part of wimlib, a library for working with WIM files.
14 * wimlib is free software; you can redistribute it and/or modify it under the
15 * terms of the GNU General Public License as published by the Free
16 * Software Foundation; either version 3 of the License, or (at your option)
19 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21 * A PARTICULAR PURPOSE. See the GNU General Public License for more
24 * You should have received a copy of the GNU General Public License
25 * along with wimlib; if not, see http://www.gnu.org/licenses/.
33 #include "wimlib/assert.h"
34 #include "wimlib/compressor_ops.h"
35 #include "wimlib/compress_common.h"
36 #include "wimlib/error.h"
37 #include "wimlib/lz_hash.h"
38 #include "wimlib/util.h"
39 #include "wimlib/xpress.h"
43 struct xpress_record_ctx {
44 u32 freqs[XPRESS_NUM_SYMBOLS];
45 struct xpress_item *chosen_items;
48 struct xpress_compressor {
51 struct xpress_item *chosen_items;
53 u32 codewords[XPRESS_NUM_SYMBOLS];
54 u8 lens[XPRESS_NUM_SYMBOLS];
55 struct xpress_record_ctx record_ctx;
58 /* Intermediate XPRESS match/literal representation. */
60 u16 adjusted_len; /* Match length minus XPRESS_MIN_MATCH_LEN */
61 u16 offset; /* Match offset */
62 /* For literals, offset == 0 and adjusted_len is the literal. */
66 * Writes @match, which is a match given in the intermediate representation for
67 * XPRESS matches, to the output stream @ostream.
69 * @codewords and @lens provide the Huffman code that is being used.
72 xpress_write_match(struct xpress_item match,
73 struct output_bitstream *restrict ostream,
74 const u32 codewords[restrict],
75 const u8 lens[restrict])
77 u8 len_hdr = min(match.adjusted_len, 0xf);
78 u8 offset_bsr = bsr32(match.offset);
79 unsigned sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr);
81 bitstream_put_bits(ostream, codewords[sym], lens[sym]);
83 if (match.adjusted_len >= 0xf) {
84 u8 byte1 = min(match.adjusted_len - 0xf, 0xff);
85 bitstream_put_byte(ostream, byte1);
87 bitstream_put_byte(ostream, match.adjusted_len & 0xff);
88 bitstream_put_byte(ostream, match.adjusted_len >> 8);
91 bitstream_put_bits(ostream, match.offset ^ (1U << offset_bsr), offset_bsr);
95 xpress_write_items(struct output_bitstream *ostream,
96 const struct xpress_item items[restrict],
98 const u32 codewords[restrict],
99 const u8 lens[restrict])
101 for (u32 i = 0; i < num_items; i++) {
102 if (items[i].offset) {
104 xpress_write_match(items[i], ostream, codewords, lens);
107 u8 lit = items[i].adjusted_len;
108 bitstream_put_bits(ostream, codewords[lit], lens[lit]);
111 bitstream_put_bits(ostream, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]);
115 xpress_record_literal(u8 lit, void *_ctx)
117 struct xpress_record_ctx *ctx = _ctx;
119 *(ctx->chosen_items++) =
120 (struct xpress_item) { .offset = 0, .adjusted_len = lit };
124 xpress_record_match(unsigned len, unsigned offset, void *_ctx)
126 struct xpress_record_ctx *ctx = _ctx;
128 XPRESS_ASSERT(len >= XPRESS_MIN_MATCH_LEN);
129 XPRESS_ASSERT(len <= XPRESS_MAX_MATCH_LEN);
130 XPRESS_ASSERT(offset >= XPRESS_MIN_OFFSET);
131 XPRESS_ASSERT(offset <= XPRESS_MAX_OFFSET);
133 unsigned adjusted_len = len - XPRESS_MIN_MATCH_LEN;
134 unsigned len_hdr = min(adjusted_len, 0xf);
135 unsigned sym = XPRESS_NUM_CHARS + ((bsr32(offset) << 4) | len_hdr);
137 XPRESS_ASSERT(sym >= XPRESS_NUM_CHARS);
138 XPRESS_ASSERT(sym < XPRESS_NUM_SYMBOLS);
141 *(ctx->chosen_items++) =
142 (struct xpress_item) { .offset = offset,
143 .adjusted_len = adjusted_len };
146 static const struct lz_params xpress_lz_params = {
147 .min_match = XPRESS_MIN_MATCH_LEN,
148 .max_match = XPRESS_MAX_MATCH_LEN,
149 .max_offset = XPRESS_MAX_OFFSET,
153 .max_lazy_match = 16,
158 xpress_compress(const void *uncompressed_data, size_t uncompressed_size,
159 void *compressed_data, size_t compressed_size_avail, void *_c)
161 struct xpress_compressor *c = _c;
162 u8 *cptr = compressed_data;
163 struct output_bitstream ostream;
164 u32 num_chosen_items;
166 size_t compressed_size;
168 /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's
169 * impossible to compress 256 bytes or less of data to less than the
172 * +1 to take into account that the buffer for compressed data is 1 byte
173 * smaller than the buffer for uncompressed data.
175 * +4 to take into account that init_output_bitstream() requires at
176 * least 4 bytes of data. */
177 if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 1 + 4)
180 /* Copy the data to a temporary buffer, but only to avoid
181 * inconsequential accesses of uninitialized memory in
182 * lz_analyze_block(). */
183 memcpy(c->window, uncompressed_data, uncompressed_size);
184 memset(c->window + uncompressed_size, 0, 8);
186 /* Determine match/literal sequence to divide the data into. */
187 memset(c->record_ctx.freqs, 0, sizeof(c->record_ctx.freqs));
188 c->record_ctx.chosen_items = c->chosen_items;
189 lz_analyze_block(c->window,
192 xpress_record_literal,
197 num_chosen_items = (c->record_ctx.chosen_items - c->chosen_items);
199 /* Account for end of data symbol. */
200 c->record_ctx.freqs[XPRESS_END_OF_DATA]++;
202 /* Build the Huffman code. */
203 make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN,
204 c->record_ctx.freqs, c->lens, c->codewords);
206 /* Output the Huffman code as a series of 512 4-bit lengths. */
207 for (i = 0; i < XPRESS_NUM_SYMBOLS; i += 2)
208 *cptr++ = (c->lens[i] & 0xf) | (c->lens[i + 1] << 4);
210 /* Output the encoded matches/literals. */
211 init_output_bitstream(&ostream, cptr,
212 compressed_size_avail - XPRESS_NUM_SYMBOLS / 2 - 1);
214 xpress_write_items(&ostream, c->chosen_items, num_chosen_items,
215 c->codewords, c->lens);
217 /* Flush any pending data and get the length of the compressed data. */
218 compressed_size = flush_output_bitstream(&ostream);
219 if (compressed_size == (u32)~0UL)
222 compressed_size += XPRESS_NUM_SYMBOLS / 2;
224 #if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION)
225 /* Verify that we really get the same thing back when decompressing. */
227 struct wimlib_decompressor *decompressor;
229 if (0 == wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_XPRESS,
235 ret = wimlib_decompress(compressed_data,
240 wimlib_free_decompressor(decompressor);
243 ERROR("Failed to decompress data we "
244 "compressed using XPRESS algorithm");
248 if (memcmp(uncompressed_data, c->window,
251 ERROR("Data we compressed using XPRESS algorithm "
252 "didn't decompress to original");
257 WARNING("Failed to create decompressor for "
258 "data verification!");
263 return compressed_size;
267 xpress_free_compressor(void *_c)
269 struct xpress_compressor *c = _c;
273 FREE(c->chosen_items);
280 xpress_create_compressor(size_t max_window_size,
281 const struct wimlib_compressor_params_header *params,
284 struct xpress_compressor *c;
286 if (max_window_size == 0 || max_window_size > (1U << 26))
287 return WIMLIB_ERR_INVALID_PARAM;
289 c = CALLOC(1, sizeof(struct xpress_compressor));
293 c->window = MALLOC(max_window_size + 8);
294 if (c->window == NULL)
297 c->max_window_size = max_window_size;
299 c->chosen_items = MALLOC(max_window_size * sizeof(c->chosen_items[0]));
300 if (c->chosen_items == NULL)
303 c->prev_tab = MALLOC(max_window_size * sizeof(c->prev_tab[0]));
304 if (c->prev_tab == NULL)
311 xpress_free_compressor(c);
312 return WIMLIB_ERR_NOMEM;
316 xpress_get_needed_memory(size_t max_window_size,
317 const struct wimlib_compressor_params_header *params)
321 size += sizeof(struct xpress_compressor);
322 size += max_window_size + 8;
323 size += max_window_size * sizeof(((struct xpress_compressor*)0)->chosen_items[0]);
324 size += max_window_size * sizeof(((struct xpress_compressor*)0)->prev_tab[0]);
329 const struct compressor_ops xpress_compressor_ops = {
330 .get_needed_memory = xpress_get_needed_memory,
331 .create_compressor = xpress_create_compressor,
332 .compress = xpress_compress,
333 .free_compressor = xpress_free_compressor,