]> wimlib.net Git - wimlib/blob - src/lzx_decompress.c
Make lzx_offset_slot_base hold unadjusted offsets
[wimlib] / src / lzx_decompress.c
1 /*
2  * lzx_decompress.c
3  *
4  * A decompressor for the LZX compression format, as used in WIM files.
5  */
6
7 /*
8  * Copyright (C) 2012-2016 Eric Biggers
9  *
10  * This file is free software; you can redistribute it and/or modify it under
11  * the terms of the GNU Lesser General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option) any
13  * later version.
14  *
15  * This file is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this file; if not, see http://www.gnu.org/licenses/.
22  */
23
24 /*
25  * LZX is an LZ77 and Huffman-code based compression format that has many
26  * similarities to DEFLATE (the format used by zlib/gzip).  The compression
27  * ratio is as good or better than DEFLATE.  See lzx_compress.c for a format
28  * overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a
29  * historical overview.  Here I make some pragmatic notes.
30  *
31  * The old specification for LZX is the document "Microsoft LZX Data Compression
32  * Format" (1997).  It defines the LZX format as used in cabinet files.  Allowed
33  * window sizes are 2^n where 15 <= n <= 21.  However, this document contains
34  * several errors, so don't read too much into it...
35  *
36  * The new specification for LZX is the document "[MS-PATCH]: LZX DELTA
37  * Compression and Decompression" (2014).  It defines the LZX format as used by
38  * Microsoft's binary patcher.  It corrects several errors in the 1997 document
39  * and extends the format in several ways --- namely, optional reference data,
40  * up to 2^25 byte windows, and longer match lengths.
41  *
42  * WIM files use a more restricted form of LZX.  No LZX DELTA extensions are
43  * present, the window is not "sliding", E8 preprocessing is done
44  * unconditionally with a fixed file size, and the maximum window size is always
45  * 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource).
46  * This code is primarily intended to implement this form of LZX.  But although
47  * not compatible with WIMGAPI, this code also supports maximum window sizes up
48  * to 2^21 bytes.
49  *
50  * TODO: Add support for window sizes up to 2^25 bytes.
51  */
52
53 #ifdef HAVE_CONFIG_H
54 #  include "config.h"
55 #endif
56
57 #include <string.h>
58
59 #include "wimlib/decompressor_ops.h"
60 #include "wimlib/decompress_common.h"
61 #include "wimlib/error.h"
62 #include "wimlib/lzx_common.h"
63 #include "wimlib/util.h"
64
65 /* These values are chosen for fast decompression.  */
66 #define LZX_MAINCODE_TABLEBITS          11
67 #define LZX_LENCODE_TABLEBITS           9
68 #define LZX_PRECODE_TABLEBITS           6
69 #define LZX_ALIGNEDCODE_TABLEBITS       7
70
71 #define LZX_READ_LENS_MAX_OVERRUN 50
72
73 /* Reusable heap-allocated memory for LZX decompression */
74 struct lzx_decompressor {
75
76         DECODE_TABLE(maincode_decode_table, LZX_MAINCODE_MAX_NUM_SYMBOLS,
77                      LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
78         u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
79
80         DECODE_TABLE(lencode_decode_table, LZX_LENCODE_NUM_SYMBOLS,
81                      LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
82         u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
83
84         union {
85                 DECODE_TABLE(alignedcode_decode_table, LZX_ALIGNEDCODE_NUM_SYMBOLS,
86                              LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
87                 u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
88         };
89
90         union {
91                 DECODE_TABLE(precode_decode_table, LZX_PRECODE_NUM_SYMBOLS,
92                              LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
93                 u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
94         };
95
96         unsigned window_order;
97         unsigned num_main_syms;
98 } _aligned_attribute(DECODE_TABLE_ALIGNMENT);
99
100 /* Read a Huffman-encoded symbol using the precode. */
101 static inline unsigned
102 read_presym(const struct lzx_decompressor *d, struct input_bitstream *is)
103 {
104         return read_huffsym(is, d->precode_decode_table,
105                             LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
106 }
107
108 /* Read a Huffman-encoded symbol using the main code. */
109 static inline unsigned
110 read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is)
111 {
112         return read_huffsym(is, d->maincode_decode_table,
113                             LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
114 }
115
116 /* Read a Huffman-encoded symbol using the length code. */
117 static inline unsigned
118 read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is)
119 {
120         return read_huffsym(is, d->lencode_decode_table,
121                             LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
122 }
123
124 /* Read a Huffman-encoded symbol using the aligned offset code. */
125 static inline unsigned
126 read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is)
127 {
128         return read_huffsym(is, d->alignedcode_decode_table,
129                             LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
130 }
131
132 /*
133  * Read a precode from the compressed input bitstream, then use it to decode
134  * @num_lens codeword length values and write them to @lens.
135  */
136 static int
137 lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is,
138                        u8 *lens, unsigned num_lens)
139 {
140         u8 *len_ptr = lens;
141         u8 *lens_end = lens + num_lens;
142
143         /* Read the lengths of the precode codewords, which are stored
144          * explicitly. */
145         for (int i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
146                 d->precode_lens[i] =
147                         bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
148         }
149
150         /* Build the decoding table for the precode. */
151         if (make_huffman_decode_table(d->precode_decode_table,
152                                       LZX_PRECODE_NUM_SYMBOLS,
153                                       LZX_PRECODE_TABLEBITS,
154                                       d->precode_lens,
155                                       LZX_MAX_PRE_CODEWORD_LEN))
156                 return -1;
157
158         /* Decode the codeword lengths. */
159         do {
160                 unsigned presym;
161                 u8 len;
162
163                 /* Read the next precode symbol. */
164                 presym = read_presym(d, is);
165                 if (presym < 17) {
166                         /* Difference from old length */
167                         len = *len_ptr - presym;
168                         if ((s8)len < 0)
169                                 len += 17;
170                         *len_ptr++ = len;
171                 } else {
172                         /* Special RLE values */
173
174                         unsigned run_len;
175
176                         if (presym == 17) {
177                                 /* Run of 0's */
178                                 run_len = 4 + bitstream_read_bits(is, 4);
179                                 len = 0;
180                         } else if (presym == 18) {
181                                 /* Longer run of 0's */
182                                 run_len = 20 + bitstream_read_bits(is, 5);
183                                 len = 0;
184                         } else {
185                                 /* Run of identical lengths */
186                                 run_len = 4 + bitstream_read_bits(is, 1);
187                                 presym = read_presym(d, is);
188                                 if (unlikely(presym > 17))
189                                         return -1;
190                                 len = *len_ptr - presym;
191                                 if ((s8)len < 0)
192                                         len += 17;
193                         }
194
195                         do {
196                                 *len_ptr++ = len;
197                         } while (--run_len);
198                         /*
199                          * The worst case overrun is when presym == 18,
200                          * run_len == 20 + 31, and only 1 length was remaining.
201                          * So LZX_READ_LENS_MAX_OVERRUN == 50.
202                          *
203                          * Overrun while reading the first half of maincode_lens
204                          * can corrupt the previous values in the second half.
205                          * This doesn't really matter because the resulting
206                          * lengths will still be in range, and data that
207                          * generates overruns is invalid anyway.
208                          */
209                 }
210         } while (len_ptr < lens_end);
211
212         return 0;
213 }
214
215 /*
216  * Read the header of an LZX block.  For all block types, the block type and
217  * size is saved in *block_type_ret and *block_size_ret, respectively.  For
218  * compressed blocks, the codeword lengths are also saved.  For uncompressed
219  * blocks, the recent offsets queue is also updated.
220  */
221 static int
222 lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is,
223                       u32 recent_offsets[], int *block_type_ret,
224                       u32 *block_size_ret)
225 {
226         int block_type;
227         u32 block_size;
228
229         bitstream_ensure_bits(is, 4);
230
231         /* Read the block type. */
232         block_type = bitstream_pop_bits(is, 3);
233
234         /* Read the block size. */
235         if (bitstream_pop_bits(is, 1)) {
236                 block_size = LZX_DEFAULT_BLOCK_SIZE;
237         } else {
238                 block_size = bitstream_read_bits(is, 16);
239                 if (d->window_order >= 16) {
240                         block_size <<= 8;
241                         block_size |= bitstream_read_bits(is, 8);
242                 }
243         }
244
245         switch (block_type) {
246
247         case LZX_BLOCKTYPE_ALIGNED:
248
249                 /* Read the aligned offset codeword lengths. */
250
251                 for (int i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
252                         d->alignedcode_lens[i] =
253                                 bitstream_read_bits(is,
254                                                     LZX_ALIGNEDCODE_ELEMENT_SIZE);
255                 }
256
257                 /* Fall though, since the rest of the header for aligned offset
258                  * blocks is the same as that for verbatim blocks.  */
259
260         case LZX_BLOCKTYPE_VERBATIM:
261
262                 /* Read the main codeword lengths, which are divided into two
263                  * parts: literal symbols and match headers. */
264
265                 if (lzx_read_codeword_lens(d, is, d->maincode_lens,
266                                            LZX_NUM_CHARS))
267                         return -1;
268
269                 if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS,
270                                            d->num_main_syms - LZX_NUM_CHARS))
271                         return -1;
272
273
274                 /* Read the length codeword lengths. */
275
276                 if (lzx_read_codeword_lens(d, is, d->lencode_lens,
277                                            LZX_LENCODE_NUM_SYMBOLS))
278                         return -1;
279
280                 break;
281
282         case LZX_BLOCKTYPE_UNCOMPRESSED:
283                 /*
284                  * The header of an uncompressed block contains new values for
285                  * the recent offsets queue, starting on the next 16-bit
286                  * boundary in the bitstream.  Careful: if the stream is
287                  * *already* aligned, the correct thing to do is to throw away
288                  * the next 16 bits (this is probably a mistake in the format).
289                  */
290                 bitstream_ensure_bits(is, 1);
291                 bitstream_align(is);
292                 recent_offsets[0] = bitstream_read_u32(is);
293                 recent_offsets[1] = bitstream_read_u32(is);
294                 recent_offsets[2] = bitstream_read_u32(is);
295
296                 /* Offsets of 0 are invalid. */
297                 if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
298                     recent_offsets[2] == 0)
299                         return -1;
300                 break;
301
302         default:
303                 /* Unrecognized block type */
304                 return -1;
305         }
306
307         *block_type_ret = block_type;
308         *block_size_ret = block_size;
309         return 0;
310 }
311
312 /* Decompress a block of LZX-compressed data. */
313 static int
314 lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is,
315                      int block_type, u32 block_size,
316                      u8 * const out_begin, u8 *out_next, u32 recent_offsets[])
317 {
318         u8 * const block_end = out_next + block_size;
319         unsigned min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS;
320
321         /* Build the Huffman decode tables. */
322
323         if (block_type == LZX_BLOCKTYPE_ALIGNED) {
324                 if (make_huffman_decode_table(d->alignedcode_decode_table,
325                                               LZX_ALIGNEDCODE_NUM_SYMBOLS,
326                                               LZX_ALIGNEDCODE_TABLEBITS,
327                                               d->alignedcode_lens,
328                                               LZX_MAX_ALIGNED_CODEWORD_LEN))
329                         return -1;
330                 min_aligned_offset_slot = 8;
331         }
332
333         if (make_huffman_decode_table(d->lencode_decode_table,
334                                       LZX_LENCODE_NUM_SYMBOLS,
335                                       LZX_LENCODE_TABLEBITS,
336                                       d->lencode_lens,
337                                       LZX_MAX_LEN_CODEWORD_LEN))
338                 return -1;
339
340         if (make_huffman_decode_table(d->maincode_decode_table,
341                                       d->num_main_syms,
342                                       LZX_MAINCODE_TABLEBITS,
343                                       d->maincode_lens,
344                                       LZX_MAX_MAIN_CODEWORD_LEN))
345                 return -1;
346
347         /* Decode the literals and matches. */
348
349         do {
350                 unsigned mainsym;
351                 unsigned length;
352                 u32 offset;
353                 unsigned offset_slot;
354                 unsigned num_extra_bits;
355
356                 mainsym = read_mainsym(d, is);
357                 if (mainsym < LZX_NUM_CHARS) {
358                         /* Literal */
359                         *out_next++ = mainsym;
360                         continue;
361                 }
362
363                 /* Match */
364
365                 /* Decode the length header and offset slot.  */
366                 mainsym -= LZX_NUM_CHARS;
367                 length = mainsym % LZX_NUM_LEN_HEADERS;
368                 offset_slot = mainsym / LZX_NUM_LEN_HEADERS;
369
370                 /* If needed, read a length symbol to decode the full length. */
371                 if (length == LZX_NUM_PRIMARY_LENS)
372                         length += read_lensym(d, is);
373                 length += LZX_MIN_MATCH_LEN;
374
375                 if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
376                         /* Repeat offset  */
377
378                         /* Note: This isn't a real LRU queue, since using the R2
379                          * offset doesn't bump the R1 offset down to R2.  This
380                          * quirk allows all 3 recent offsets to be handled by
381                          * the same code.  (For R0, the swap is a no-op.)  */
382                         offset = recent_offsets[offset_slot];
383                         recent_offsets[offset_slot] = recent_offsets[0];
384                 } else {
385                         /* Explicit offset  */
386
387                         /* Look up the number of extra bits that need to be read
388                          * to decode offsets with this offset slot.  */
389                         num_extra_bits = lzx_extra_offset_bits[offset_slot];
390
391                         /* Start with the offset slot base value.  */
392                         offset = lzx_offset_slot_base[offset_slot];
393
394                         /* In aligned offset blocks, the low-order 3 bits of
395                          * each offset are encoded using the aligned offset
396                          * code.  Otherwise, all the extra bits are literal.  */
397
398                         if (offset_slot >= min_aligned_offset_slot) {
399                                 offset +=
400                                         bitstream_read_bits(is,
401                                                             num_extra_bits -
402                                                                 LZX_NUM_ALIGNED_OFFSET_BITS)
403                                                         << LZX_NUM_ALIGNED_OFFSET_BITS;
404                                 offset += read_alignedsym(d, is);
405                         } else {
406                                 offset += bitstream_read_bits(is, num_extra_bits);
407                         }
408
409                         /* Update the match offset LRU queue.  */
410                         STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
411                         recent_offsets[2] = recent_offsets[1];
412                         recent_offsets[1] = recent_offsets[0];
413                 }
414                 recent_offsets[0] = offset;
415
416                 /* Validate the match, then copy it to the current position.  */
417
418                 if (unlikely(length > block_end - out_next))
419                         return -1;
420
421                 if (unlikely(offset > out_next - out_begin))
422                         return -1;
423
424                 lz_copy(out_next, length, offset, block_end, LZX_MIN_MATCH_LEN);
425
426                 out_next += length;
427
428         } while (out_next != block_end);
429
430         return 0;
431 }
432
433 static int
434 lzx_decompress(const void *restrict compressed_data, size_t compressed_size,
435                void *restrict uncompressed_data, size_t uncompressed_size,
436                void *restrict _d)
437 {
438         struct lzx_decompressor *d = _d;
439         u8 * const out_begin = uncompressed_data;
440         u8 *out_next = out_begin;
441         u8 * const out_end = out_begin + uncompressed_size;
442         struct input_bitstream is;
443         STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
444         u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
445         unsigned may_have_e8_byte = 0;
446
447         init_input_bitstream(&is, compressed_data, compressed_size);
448
449         /* Codeword lengths begin as all 0's for delta encoding purposes. */
450         memset(d->maincode_lens, 0, d->num_main_syms);
451         memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
452
453         /* Decompress blocks until we have all the uncompressed data. */
454
455         while (out_next != out_end) {
456                 int block_type;
457                 u32 block_size;
458
459                 if (lzx_read_block_header(d, &is, recent_offsets,
460                                           &block_type, &block_size))
461                         return -1;
462
463                 if (block_size < 1 || block_size > out_end - out_next)
464                         return -1;
465
466                 if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) {
467
468                         /* Compressed block */
469                         if (lzx_decompress_block(d, &is, block_type, block_size,
470                                                  out_begin, out_next,
471                                                  recent_offsets))
472                                 return -1;
473
474                         /* If the first E8 byte was in this block, then it must
475                          * have been encoded as a literal using mainsym E8. */
476                         may_have_e8_byte |= d->maincode_lens[0xE8];
477                 } else {
478
479                         /* Uncompressed block */
480                         if (bitstream_read_bytes(&is, out_next, block_size))
481                                 return -1;
482
483                         /* Re-align the bitstream if needed. */
484                         if (block_size & 1)
485                                 bitstream_read_byte(&is);
486
487                         /* There may have been an E8 byte in the block. */
488                         may_have_e8_byte = 1;
489                 }
490                 out_next += block_size;
491         }
492
493         /* Postprocess the data unless it cannot possibly contain E8 bytes. */
494         if (may_have_e8_byte)
495                 lzx_postprocess(uncompressed_data, uncompressed_size);
496
497         return 0;
498 }
499
500 static void
501 lzx_free_decompressor(void *_d)
502 {
503         ALIGNED_FREE(_d);
504 }
505
506 static int
507 lzx_create_decompressor(size_t max_block_size, void **d_ret)
508 {
509         unsigned window_order;
510         struct lzx_decompressor *d;
511
512         window_order = lzx_get_window_order(max_block_size);
513         if (window_order == 0)
514                 return WIMLIB_ERR_INVALID_PARAM;
515
516         d = ALIGNED_MALLOC(sizeof(*d), DECODE_TABLE_ALIGNMENT);
517         if (!d)
518                 return WIMLIB_ERR_NOMEM;
519
520         d->window_order = window_order;
521         d->num_main_syms = lzx_get_num_main_syms(window_order);
522
523         *d_ret = d;
524         return 0;
525 }
526
527 const struct decompressor_ops lzx_decompressor_ops = {
528         .create_decompressor = lzx_create_decompressor,
529         .decompress          = lzx_decompress,
530         .free_decompressor   = lzx_free_decompressor,
531 };