wimlib.net Git - wimlib/blob - src/lzx_decompress.c

   1 /*
   2  * lzx_decompress.c
   3  *
   4  * A decompressor for the LZX compression format, as used in WIM files.
   5  */
   6
   7 /*
   8  * Copyright (C) 2012-2016 Eric Biggers
   9  *
  10  * This file is free software; you can redistribute it and/or modify it under
  11  * the terms of the GNU Lesser General Public License as published by the Free
  12  * Software Foundation; either version 3 of the License, or (at your option) any
  13  * later version.
  14  *
  15  * This file is distributed in the hope that it will be useful, but WITHOUT
  16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  18  * details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public License
  21  * along with this file; if not, see http://www.gnu.org/licenses/.
  22  */
  23
  24 /*
  25  * LZX is an LZ77 and Huffman-code based compression format that has many
  26  * similarities to DEFLATE (the format used by zlib/gzip).  The compression
  27  * ratio is as good or better than DEFLATE.  See lzx_compress.c for a format
  28  * overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a
  29  * historical overview.  Here I make some pragmatic notes.
  30  *
  31  * The old specification for LZX is the document "Microsoft LZX Data Compression
  32  * Format" (1997).  It defines the LZX format as used in cabinet files.  Allowed
  33  * window sizes are 2^n where 15 <= n <= 21.  However, this document contains
  34  * several errors, so don't read too much into it...
  35  *
  36  * The new specification for LZX is the document "[MS-PATCH]: LZX DELTA
  37  * Compression and Decompression" (2014).  It defines the LZX format as used by
  38  * Microsoft's binary patcher.  It corrects several errors in the 1997 document
  39  * and extends the format in several ways --- namely, optional reference data,
  40  * up to 2^25 byte windows, and longer match lengths.
  41  *
  42  * WIM files use a more restricted form of LZX.  No LZX DELTA extensions are
  43  * present, the window is not "sliding", E8 preprocessing is done
  44  * unconditionally with a fixed file size, and the maximum window size is always
  45  * 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource).
  46  * This code is primarily intended to implement this form of LZX.  But although
  47  * not compatible with WIMGAPI, this code also supports maximum window sizes up
  48  * to 2^21 bytes.
  49  *
  50  * TODO: Add support for window sizes up to 2^25 bytes.
  51  */
  52
  53 #ifdef HAVE_CONFIG_H
  54 #  include "config.h"
  55 #endif
  56
  57 #include <string.h>
  58
  59 #include "wimlib/decompressor_ops.h"
  60 #include "wimlib/decompress_common.h"
  61 #include "wimlib/error.h"
  62 #include "wimlib/lzx_common.h"
  63 #include "wimlib/util.h"
  64
  65 /* These values are chosen for fast decompression.  */
  66 #define LZX_MAINCODE_TABLEBITS          11
  67 #define LZX_LENCODE_TABLEBITS           10
  68 #define LZX_PRECODE_TABLEBITS           6
  69 #define LZX_ALIGNEDCODE_TABLEBITS       7
  70
  71 #define LZX_READ_LENS_MAX_OVERRUN 50
  72
  73 struct lzx_decompressor {
  74
  75         u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) +
  76                                         (LZX_MAINCODE_MAX_NUM_SYMBOLS * 2)]
  77                                         _aligned_attribute(DECODE_TABLE_ALIGNMENT);
  78         u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
  79
  80
  81         u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) +
  82                                         (LZX_LENCODE_NUM_SYMBOLS * 2)]
  83                                         _aligned_attribute(DECODE_TABLE_ALIGNMENT);
  84         u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
  85
  86         union {
  87                 u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) +
  88                                                 (LZX_ALIGNEDCODE_NUM_SYMBOLS * 2)]
  89                                                 _aligned_attribute(DECODE_TABLE_ALIGNMENT);
  90                 u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
  91         };
  92
  93         union {
  94                 u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) +
  95                                          (LZX_PRECODE_NUM_SYMBOLS * 2)]
  96                                                 _aligned_attribute(DECODE_TABLE_ALIGNMENT);
  97                 u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
  98                 u8 extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
  99         };
 100
 101         unsigned window_order;
 102         unsigned num_main_syms;
 103
 104         /* Like lzx_extra_offset_bits[], but does not include the entropy-coded
 105          * bits of aligned offset blocks */
 106         u8 extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS];
 107
 108 } _aligned_attribute(DECODE_TABLE_ALIGNMENT);
 109
 110 /* Read a Huffman-encoded symbol using the precode. */
 111 static inline unsigned
 112 read_presym(const struct lzx_decompressor *d, struct input_bitstream *is)
 113 {
 114         return read_huffsym(is, d->precode_decode_table,
 115                             LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
 116 }
 117
 118 /* Read a Huffman-encoded symbol using the main code. */
 119 static inline unsigned
 120 read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is)
 121 {
 122         return read_huffsym(is, d->maincode_decode_table,
 123                             LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
 124 }
 125
 126 /* Read a Huffman-encoded symbol using the length code. */
 127 static inline unsigned
 128 read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is)
 129 {
 130         return read_huffsym(is, d->lencode_decode_table,
 131                             LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
 132 }
 133
 134 /* Read a Huffman-encoded symbol using the aligned offset code. */
 135 static inline unsigned
 136 read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is)
 137 {
 138         return read_huffsym(is, d->alignedcode_decode_table,
 139                             LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
 140 }
 141
 142 /*
 143  * Read a precode from the compressed input bitstream, then use it to decode
 144  * @num_lens codeword length values and write them to @lens.
 145  */
 146 static int
 147 lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is,
 148                        u8 *lens, unsigned num_lens)
 149 {
 150         u8 *len_ptr = lens;
 151         u8 *lens_end = lens + num_lens;
 152
 153         /* Read the lengths of the precode codewords.  These are stored
 154          * explicitly. */
 155         for (int i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
 156                 d->precode_lens[i] =
 157                         bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
 158         }
 159
 160         /* Build the decoding table for the precode. */
 161         if (make_huffman_decode_table(d->precode_decode_table,
 162                                       LZX_PRECODE_NUM_SYMBOLS,
 163                                       LZX_PRECODE_TABLEBITS,
 164                                       d->precode_lens,
 165                                       LZX_MAX_PRE_CODEWORD_LEN))
 166                 return -1;
 167
 168         /* Decode the codeword lengths.  */
 169         do {
 170                 unsigned presym;
 171                 u8 len;
 172
 173                 /* Read the next precode symbol.  */
 174                 presym = read_presym(d, is);
 175                 if (presym < 17) {
 176                         /* Difference from old length  */
 177                         len = *len_ptr - presym;
 178                         if ((s8)len < 0)
 179                                 len += 17;
 180                         *len_ptr++ = len;
 181                 } else {
 182                         /* Special RLE values  */
 183
 184                         unsigned run_len;
 185
 186                         if (presym == 17) {
 187                                 /* Run of 0's  */
 188                                 run_len = 4 + bitstream_read_bits(is, 4);
 189                                 len = 0;
 190                         } else if (presym == 18) {
 191                                 /* Longer run of 0's  */
 192                                 run_len = 20 + bitstream_read_bits(is, 5);
 193                                 len = 0;
 194                         } else {
 195                                 /* Run of identical lengths  */
 196                                 run_len = 4 + bitstream_read_bits(is, 1);
 197                                 presym = read_presym(d, is);
 198                                 if (unlikely(presym > 17))
 199                                         return -1;
 200                                 len = *len_ptr - presym;
 201                                 if ((s8)len < 0)
 202                                         len += 17;
 203                         }
 204
 205                         do {
 206                                 *len_ptr++ = len;
 207                         } while (--run_len);
 208                         /*
 209                          * The worst case overrun is when presym == 18,
 210                          * run_len == 20 + 31, and only 1 length was remaining.
 211                          * So LZX_READ_LENS_MAX_OVERRUN == 50.
 212                          *
 213                          * Overrun while reading the first half of maincode_lens
 214                          * can corrupt the previous values in the second half.
 215                          * This doesn't really matter because the resulting
 216                          * lengths will still be in range, and data that
 217                          * generates overruns is invalid anyway.
 218                          */
 219                 }
 220         } while (len_ptr < lens_end);
 221
 222         return 0;
 223 }
 224
 225 /*
 226  * Read the header of an LZX block.  For all block types, the block type and
 227  * size is saved in *block_type_ret and *block_size_ret, respectively.  For
 228  * compressed blocks, the codeword lengths are also saved.  For uncompressed
 229  * blocks, the recent offsets queue is also updated.
 230  */
 231 static int
 232 lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is,
 233                       u32 recent_offsets[], int *block_type_ret,
 234                       u32 *block_size_ret)
 235 {
 236         int block_type;
 237         u32 block_size;
 238
 239         bitstream_ensure_bits(is, 4);
 240
 241         /* Read the block type. */
 242         block_type = bitstream_pop_bits(is, 3);
 243
 244         /* Read the block size. */
 245         if (bitstream_pop_bits(is, 1)) {
 246                 block_size = LZX_DEFAULT_BLOCK_SIZE;
 247         } else {
 248                 block_size = bitstream_read_bits(is, 16);
 249                 if (d->window_order >= 16) {
 250                         block_size <<= 8;
 251                         block_size |= bitstream_read_bits(is, 8);
 252                 }
 253         }
 254
 255         switch (block_type) {
 256
 257         case LZX_BLOCKTYPE_ALIGNED:
 258
 259                 /* Read the aligned offset codeword lengths. */
 260
 261                 for (int i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
 262                         d->alignedcode_lens[i] =
 263                                 bitstream_read_bits(is,
 264                                                     LZX_ALIGNEDCODE_ELEMENT_SIZE);
 265                 }
 266
 267                 /* Fall though, since the rest of the header for aligned offset
 268                  * blocks is the same as that for verbatim blocks.  */
 269
 270         case LZX_BLOCKTYPE_VERBATIM:
 271
 272                 /* Read the main codeword lengths, which are divided into two
 273                  * parts: literal symbols and match headers. */
 274
 275                 if (lzx_read_codeword_lens(d, is, d->maincode_lens,
 276                                            LZX_NUM_CHARS))
 277                         return -1;
 278
 279                 if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS,
 280                                            d->num_main_syms - LZX_NUM_CHARS))
 281                         return -1;
 282
 283
 284                 /* Read the length codeword lengths. */
 285
 286                 if (lzx_read_codeword_lens(d, is, d->lencode_lens,
 287                                            LZX_LENCODE_NUM_SYMBOLS))
 288                         return -1;
 289
 290                 break;
 291
 292         case LZX_BLOCKTYPE_UNCOMPRESSED:
 293                 /*
 294                  * The header of an uncompressed block contains new values for
 295                  * the recent offsets queue, starting on the next 16-bit
 296                  * boundary in the bitstream.  Careful: if the stream is
 297                  * *already* aligned, the correct thing to do is to throw away
 298                  * the next 16 bits (this is probably a mistake in the format).
 299                  */
 300                 bitstream_ensure_bits(is, 1);
 301                 bitstream_align(is);
 302                 recent_offsets[0] = bitstream_read_u32(is);
 303                 recent_offsets[1] = bitstream_read_u32(is);
 304                 recent_offsets[2] = bitstream_read_u32(is);
 305
 306                 /* Offsets of 0 are invalid.  */
 307                 if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
 308                     recent_offsets[2] == 0)
 309                         return -1;
 310                 break;
 311
 312         default:
 313                 /* Unrecognized block type.  */
 314                 return -1;
 315         }
 316
 317         *block_type_ret = block_type;
 318         *block_size_ret = block_size;
 319         return 0;
 320 }
 321
 322 /* Decompress a block of LZX-compressed data. */
 323 static int
 324 lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is,
 325                      int block_type, u32 block_size,
 326                      u8 * const out_begin, u8 *out_next, u32 recent_offsets[])
 327 {
 328         u8 * const block_end = out_next + block_size;
 329         unsigned min_aligned_offset_slot;
 330
 331         /*
 332          * Build the Huffman decode tables.  We always need to build the main
 333          * and length decode tables.  For aligned blocks we additionally need to
 334          * build the aligned offset decode table.
 335          */
 336
 337         if (make_huffman_decode_table(d->maincode_decode_table,
 338                                       d->num_main_syms,
 339                                       LZX_MAINCODE_TABLEBITS,
 340                                       d->maincode_lens,
 341                                       LZX_MAX_MAIN_CODEWORD_LEN))
 342                 return -1;
 343
 344         if (make_huffman_decode_table(d->lencode_decode_table,
 345                                       LZX_LENCODE_NUM_SYMBOLS,
 346                                       LZX_LENCODE_TABLEBITS,
 347                                       d->lencode_lens,
 348                                       LZX_MAX_LEN_CODEWORD_LEN))
 349                 return -1;
 350
 351         if (block_type == LZX_BLOCKTYPE_ALIGNED) {
 352                 if (make_huffman_decode_table(d->alignedcode_decode_table,
 353                                               LZX_ALIGNEDCODE_NUM_SYMBOLS,
 354                                               LZX_ALIGNEDCODE_TABLEBITS,
 355                                               d->alignedcode_lens,
 356                                               LZX_MAX_ALIGNED_CODEWORD_LEN))
 357                         return -1;
 358                 min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
 359                 memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned,
 360                        sizeof(lzx_extra_offset_bits));
 361         } else {
 362                 min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS;
 363                 memcpy(d->extra_offset_bits, lzx_extra_offset_bits,
 364                        sizeof(lzx_extra_offset_bits));
 365         }
 366
 367         /* Decode the literals and matches. */
 368
 369         do {
 370                 unsigned mainsym;
 371                 unsigned length;
 372                 u32 offset;
 373                 unsigned offset_slot;
 374
 375                 mainsym = read_mainsym(d, is);
 376                 if (mainsym < LZX_NUM_CHARS) {
 377                         /* Literal */
 378                         *out_next++ = mainsym;
 379                         continue;
 380                 }
 381
 382                 /* Match */
 383
 384                 /* Decode the length header and offset slot.  */
 385                 STATIC_ASSERT(LZX_NUM_CHARS % LZX_NUM_LEN_HEADERS == 0);
 386                 length = mainsym % LZX_NUM_LEN_HEADERS;
 387                 offset_slot = (mainsym - LZX_NUM_CHARS) / LZX_NUM_LEN_HEADERS;
 388
 389                 /* If needed, read a length symbol to decode the full length. */
 390                 if (length == LZX_NUM_PRIMARY_LENS)
 391                         length += read_lensym(d, is);
 392                 length += LZX_MIN_MATCH_LEN;
 393
 394                 if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
 395                         /* Repeat offset  */
 396
 397                         /* Note: This isn't a real LRU queue, since using the R2
 398                          * offset doesn't bump the R1 offset down to R2. */
 399                         offset = recent_offsets[offset_slot];
 400                         recent_offsets[offset_slot] = recent_offsets[0];
 401                 } else {
 402                         /* Explicit offset  */
 403                         offset = bitstream_read_bits(is, d->extra_offset_bits[offset_slot]);
 404                         if (offset_slot >= min_aligned_offset_slot) {
 405                                 offset = (offset << LZX_NUM_ALIGNED_OFFSET_BITS) |
 406                                          read_alignedsym(d, is);
 407                         }
 408                         offset += lzx_offset_slot_base[offset_slot];
 409
 410                         /* Update the match offset LRU queue.  */
 411                         STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
 412                         recent_offsets[2] = recent_offsets[1];
 413                         recent_offsets[1] = recent_offsets[0];
 414                 }
 415                 recent_offsets[0] = offset;
 416
 417                 /* Validate the match, then copy it to the current position.  */
 418
 419                 if (unlikely(length > block_end - out_next))
 420                         return -1;
 421
 422                 if (unlikely(offset > out_next - out_begin))
 423                         return -1;
 424
 425                 lz_copy(out_next, length, offset, block_end, LZX_MIN_MATCH_LEN);
 426
 427                 out_next += length;
 428
 429         } while (out_next != block_end);
 430
 431         return 0;
 432 }
 433
 434 static int
 435 lzx_decompress(const void *restrict compressed_data, size_t compressed_size,
 436                void *restrict uncompressed_data, size_t uncompressed_size,
 437                void *restrict _d)
 438 {
 439         struct lzx_decompressor *d = _d;
 440         u8 * const out_begin = uncompressed_data;
 441         u8 *out_next = out_begin;
 442         u8 * const out_end = out_begin + uncompressed_size;
 443         struct input_bitstream is;
 444         STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
 445         u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
 446         unsigned may_have_e8_byte = 0;
 447
 448         init_input_bitstream(&is, compressed_data, compressed_size);
 449
 450         /* Codeword lengths begin as all 0's for delta encoding purposes. */
 451         memset(d->maincode_lens, 0, d->num_main_syms);
 452         memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
 453
 454         /* Decompress blocks until we have all the uncompressed data. */
 455
 456         while (out_next != out_end) {
 457                 int block_type;
 458                 u32 block_size;
 459
 460                 if (lzx_read_block_header(d, &is, recent_offsets,
 461                                           &block_type, &block_size))
 462                         return -1;
 463
 464                 if (block_size < 1 || block_size > out_end - out_next)
 465                         return -1;
 466
 467                 if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) {
 468
 469                         /* Compressed block */
 470                         if (lzx_decompress_block(d, &is, block_type, block_size,
 471                                                  out_begin, out_next,
 472                                                  recent_offsets))
 473                                 return -1;
 474
 475                         /* If the first E8 byte was in this block, then it must
 476                          * have been encoded as a literal using mainsym E8. */
 477                         may_have_e8_byte |= d->maincode_lens[0xE8];
 478                 } else {
 479
 480                         /* Uncompressed block */
 481                         if (bitstream_read_bytes(&is, out_next, block_size))
 482                                 return -1;
 483
 484                         /* Re-align the bitstream if needed. */
 485                         if (block_size & 1)
 486                                 bitstream_read_byte(&is);
 487
 488                         /* There may have been an E8 byte in the block. */
 489                         may_have_e8_byte = 1;
 490                 }
 491                 out_next += block_size;
 492         }
 493
 494         /* Postprocess the data unless it cannot possibly contain E8 bytes. */
 495         if (may_have_e8_byte)
 496                 lzx_postprocess(uncompressed_data, uncompressed_size);
 497
 498         return 0;
 499 }
 500
 501 static int
 502 lzx_create_decompressor(size_t max_block_size, void **d_ret)
 503 {
 504         unsigned window_order;
 505         struct lzx_decompressor *d;
 506
 507         window_order = lzx_get_window_order(max_block_size);
 508         if (window_order == 0)
 509                 return WIMLIB_ERR_INVALID_PARAM;
 510
 511         d = ALIGNED_MALLOC(sizeof(*d), DECODE_TABLE_ALIGNMENT);
 512         if (!d)
 513                 return WIMLIB_ERR_NOMEM;
 514
 515         d->window_order = window_order;
 516         d->num_main_syms = lzx_get_num_main_syms(window_order);
 517
 518         /* Initialize 'd->extra_offset_bits_minus_aligned'. */
 519         STATIC_ASSERT(sizeof(d->extra_offset_bits_minus_aligned) ==
 520                       sizeof(lzx_extra_offset_bits));
 521         STATIC_ASSERT(sizeof(d->extra_offset_bits) ==
 522                       sizeof(lzx_extra_offset_bits));
 523         memcpy(d->extra_offset_bits_minus_aligned, lzx_extra_offset_bits,
 524                sizeof(lzx_extra_offset_bits));
 525         for (unsigned offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
 526              offset_slot < LZX_MAX_OFFSET_SLOTS; offset_slot++)
 527         {
 528                 d->extra_offset_bits_minus_aligned[offset_slot] -=
 529                                 LZX_NUM_ALIGNED_OFFSET_BITS;
 530         }
 531
 532         *d_ret = d;
 533         return 0;
 534 }
 535
 536 static void
 537 lzx_free_decompressor(void *_d)
 538 {
 539         ALIGNED_FREE(_d);
 540 }
 541
 542 const struct decompressor_ops lzx_decompressor_ops = {
 543         .create_decompressor = lzx_create_decompressor,
 544         .decompress          = lzx_decompress,
 545         .free_decompressor   = lzx_free_decompressor,
 546 };