wimlib.net Git - wimlib/blob - src/lzx-comp.c

   1 /*
   2  * lzx-comp.c
   3  *
   4  * LZX compression routines.
   5  *
   6  * This code was originally based on code written by Matthew T. Russotto
   7  *      (liblzxcomp).
   8  *
   9  * Copyright (C) 2002 Matthew T. Russotto
  10  * Copyright (C) 2012 Eric Biggers
  11  *
  12  * wimlib - Library for working with WIM files
  13  *
  14  * This library is free software; you can redistribute it and/or modify it under
  15  * the terms of the GNU Lesser General Public License as published by the Free
  16  * Software Foundation; either version 2.1 of the License, or (at your option) any
  17  * later version.
  18  *
  19  * This library is distributed in the hope that it will be useful, but WITHOUT ANY
  20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
  21  * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
  22  *
  23  * You should have received a copy of the GNU Lesser General Public License along
  24  * with this library; if not, write to the Free Software Foundation, Inc., 59
  25  * Temple Place, Suite 330, Boston, MA 02111-1307 USA
  26  */
  27
  28
  29
  30 /*
  31  * This file provides lzx_compress(), a function to compress an in-memory buffer
  32  * of data using LZX compression, as used in the WIM file format.
  33  *
  34  * There is no sliding window, as for the compressed chunks in WIM resources,
  35  * the window is always the length of the input.
  36  *
  37  * The basic algorithm should be familiar if you are familiar with Huffman trees
  38  * and with other LZ77-based formats such as DEFLATE.  Otherwise it can be quite
  39  * tricky to understand.  Basically it is the following:
  40  *
  41  * - Preprocess the input data (LZX-specific)
  42  * - Go through the input data and determine matches.  This part is based on
  43  *       code from zlib, and a hash table of 3-character strings is used to
  44  *       accelerate the process of finding matches.
  45  * - Build the Huffman trees based on the frequencies of symbols determined
  46  *       while recording matches.
  47  * - Output the block header, including the Huffman trees; then output the
  48  *       compressed stream of matches and literal characters.
  49  */
  50
  51
  52 #include "lzx.h"
  53 #include "comp.h"
  54 #include <math.h>
  55 #include <stdlib.h>
  56 #include <string.h>
  57
  58
  59 /* Structure to contain the Huffman codes for the main, length, and aligned
  60  * offset trees. */
  61 struct lzx_codes {
  62         u16 main_codewords[LZX_MAINTREE_NUM_SYMBOLS];
  63         u8  main_lens[LZX_MAINTREE_NUM_SYMBOLS];
  64
  65         u16 len_codewords[LZX_LENTREE_NUM_SYMBOLS];
  66         u8  len_lens[LZX_LENTREE_NUM_SYMBOLS];
  67
  68         u16 aligned_codewords[LZX_ALIGNEDTREE_NUM_SYMBOLS];
  69         u8  aligned_lens[LZX_ALIGNEDTREE_NUM_SYMBOLS];
  70 };
  71
  72 struct lzx_freq_tables {
  73         u32 main_freq_table[LZX_MAINTREE_NUM_SYMBOLS];
  74         u32 len_freq_table[LZX_LENTREE_NUM_SYMBOLS];
  75         u32 aligned_freq_table[LZX_ALIGNEDTREE_NUM_SYMBOLS];
  76 };
  77
  78
  79
  80
  81 /* Returns the position slot that corresponds to a given formatted offset.  This
  82  * means searching the lzx_position_base array to find what slot contains a
  83  * position base that is less than or equal to formatted_offset, where the next
  84  * slot contains a position base that is greater than or equal to
  85  * formatted_offset. */
  86 static uint lzx_get_position_slot(uint formatted_offset)
  87 {
  88         int left;
  89         int right;
  90         int mid;
  91
  92         /* Calculate position base using binary search of table; if log2 can be
  93          * done in hardware, approximation might work;
  94          * trunc(log2(formatted_offset*formatted_offset)) gets either the proper
  95          * position slot or the next one, except for slots 0, 1, and 39-49
  96          *
  97          * Slots 0-1 are handled by the R0-R1 procedures
  98          *
  99          * Slots 36-49 (formatted_offset >= 262144) can be found by
 100          * (formatted_offset/131072) + 34 == (formatted_offset >> 17) + 34;
 101          */
 102         if (formatted_offset >= 262144) {
 103                 return (formatted_offset >> 17) + 34;
 104         } else {
 105                 left = 3;
 106                 right = LZX_NUM_POSITION_SLOTS - 1;
 107                 while (1) {
 108                         mid = (left + right) >> 1;
 109                         if ((lzx_position_base[mid] <= formatted_offset) &&
 110                             lzx_position_base[mid + 1] > formatted_offset) {
 111                                 return mid;
 112                         }
 113                         if (formatted_offset > lzx_position_base[mid])
 114                                 /* too low */
 115                                 left = mid + 1;
 116                         else    /* too high */
 117                                 right = mid;
 118                 }
 119         }
 120 }
 121
 122 static u32 lzx_record_literal(u8 literal, void *__main_freq_tab)
 123 {
 124         u32 *main_freq_tab = __main_freq_tab;
 125         main_freq_tab[literal]++;
 126         return literal;
 127 }
 128
 129 /* Constructs a match from an offset and a length, and updates the LRU queue
 130  * and the frequency of symbols in the main, length, and aligned offset
 131  * alphabets.  The return value is a 32-bit integer that, if the high bit is
 132  * set, contains the match length, the position slot, and the position footer
 133  * for the match.  */
 134 static u32 lzx_record_match(uint match_offset, uint match_len,
 135                             void *__freq_tabs, void *__queue)
 136 {
 137         struct lzx_freq_tables *freq_tabs = __freq_tabs;
 138         struct lru_queue *queue = __queue;
 139         uint formatted_offset;
 140         uint position_slot;
 141         uint position_footer = 0;
 142         u32 match;
 143         u32 len_header;
 144         u32 len_pos_header;
 145         uint len_footer;
 146
 147         wimlib_assert(match_len >= LZX_MIN_MATCH && match_len <= LZX_MAX_MATCH);
 148
 149
 150         if (match_offset == queue->R0) {
 151                 formatted_offset = 0;
 152                 position_slot    = 0;
 153         } else if (match_offset == queue->R1) {
 154                 swap(queue->R0, queue->R1);
 155                 formatted_offset = 1;
 156                 position_slot    = 1;
 157         } else if (match_offset == queue->R2) {
 158                 swap(queue->R0, queue->R2);
 159                 formatted_offset = 2;
 160                 position_slot    = 2;
 161         } else {
 162                 /* Not a repeated offset. */
 163
 164                 formatted_offset = match_offset + LZX_MIN_MATCH;
 165
 166                 queue->R2 = queue->R1;
 167                 queue->R1 = queue->R0;
 168                 queue->R0 = match_offset;
 169
 170                 position_slot = lzx_get_position_slot(formatted_offset);
 171
 172                 /* Just the extra bits of the formatted offset. */
 173                 position_footer = ((1UL << lzx_extra_bits[position_slot]) - 1) &
 174                                                                 formatted_offset;
 175         }
 176
 177         /* (match length - 2) = 8 bits */
 178         /* position_slot = 6 bits */
 179         /* position_footer = 17 bits */
 180         /* total = 31 bits */
 181         /* plus one to say whether it's a literal or not */
 182
 183         match = 0x80000000 | /* bit 31 in intelligent bit ordering */
 184                 (position_slot << 25) | /* bits 30-25 */
 185                 (position_footer << 8) | /* bits 8-24 */
 186                 (match_len - LZX_MIN_MATCH); /* bits 0-7 */
 187
 188         /* Update the frequency for the main tree, the length tree (only if a
 189          * length symbol is to be output), and the aligned tree (only if an
 190          * aligned symbol is to be output.) */
 191         if (match_len < (LZX_NUM_PRIMARY_LENS + LZX_MIN_MATCH)) {
 192                 len_header = match_len - LZX_MIN_MATCH;
 193         } else {
 194                 len_header = LZX_NUM_PRIMARY_LENS;
 195                 len_footer = match_len - (LZX_NUM_PRIMARY_LENS + LZX_MIN_MATCH);
 196                 freq_tabs->len_freq_table[len_footer]++;
 197         }
 198         len_pos_header = (position_slot << 3) | len_header;
 199
 200         wimlib_assert(len_pos_header < LZX_MAINTREE_NUM_SYMBOLS - LZX_NUM_CHARS);
 201
 202         freq_tabs->main_freq_table[len_pos_header + LZX_NUM_CHARS]++;
 203
 204         if (lzx_extra_bits[position_slot] >= 3)
 205                 freq_tabs->aligned_freq_table[position_footer & 7]++;
 206
 207         return match;
 208 }
 209
 210 /*
 211  * Writes a compressed literal match to the output.
 212  *
 213  * @out:         The output bitstream.
 214  * @block_type:  The type of the block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM)
 215  * @match:       The match, encoded as a 32-bit number.
 216  * @codes:      Pointer to a structure that contains the codewords for the
 217  *                      main, length, and aligned offset Huffman codes.
 218  */
 219 static int lzx_write_match(struct output_bitstream *out, int block_type,
 220                                 u32 match, const struct lzx_codes *codes)
 221 {
 222         /* low 8 bits are the match length minus 2 */
 223         uint match_len_minus_2 = match & 0xff;
 224         /* Next 17 bits are the position footer */
 225         uint position_footer = (match >> 8) & 0x1ffff;  /* 17 bits */
 226         /* Next 6 bits are the position slot. */
 227         uint position_slot = (match >> 25) & 0x3f;      /* 6 bits */
 228         uint len_header;
 229         uint len_footer;
 230         uint len_pos_header;
 231         uint main_symbol;
 232         uint num_extra_bits;
 233         uint verbatim_bits;
 234         uint aligned_bits;
 235         int ret;
 236
 237         /* If the match length is less than MIN_MATCH (= 2) +
 238          * NUM_PRIMARY_LENS (= 7), the length header contains
 239          * the match length minus MIN_MATCH, and there is no
 240          * length footer.
 241          *
 242          * Otherwise, the length header contains
 243          * NUM_PRIMARY_LENS, and the length footer contains
 244          * the match length minus NUM_PRIMARY_LENS minus
 245          * MIN_MATCH. */
 246         if (match_len_minus_2 < LZX_NUM_PRIMARY_LENS) {
 247                 len_header = match_len_minus_2;
 248                 /* No length footer-- mark it with a special
 249                  * value. */
 250                 len_footer = (uint)(-1);
 251         } else {
 252                 len_header = LZX_NUM_PRIMARY_LENS;
 253                 len_footer = match_len_minus_2 - LZX_NUM_PRIMARY_LENS;
 254         }
 255
 256         /* Combine the position slot with the length header into
 257          * a single symbol that will be encoded with the main
 258          * tree. */
 259         len_pos_header = (position_slot << 3) | len_header;
 260
 261         /* The actual main symbol is offset by LZX_NUM_CHARS because
 262          * values under LZX_NUM_CHARS are used to indicate a literal
 263          * byte rather than a match. */
 264         main_symbol = len_pos_header + LZX_NUM_CHARS;
 265
 266         /* Output main symbol. */
 267         ret = bitstream_put_bits(out, codes->main_codewords[main_symbol],
 268                                  codes->main_lens[main_symbol]);
 269         if (ret != 0)
 270                 return ret;
 271
 272         /* If there is a length footer, output it using the
 273          * length Huffman code. */
 274         if (len_footer != (uint)(-1)) {
 275                 ret = bitstream_put_bits(out, codes->len_codewords[len_footer],
 276                                          codes->len_lens[len_footer]);
 277                 if (ret != 0)
 278                         return ret;
 279         }
 280
 281         wimlib_assert(position_slot < LZX_NUM_POSITION_SLOTS);
 282
 283         num_extra_bits = lzx_extra_bits[position_slot];
 284
 285         /* For aligned offset blocks with at least 3 extra bits, output the
 286          * verbatim bits literally, then the aligned bits encoded using the
 287          * aligned offset tree.  Otherwise, only the verbatim bits need to be
 288          * output. */
 289         if ((block_type == LZX_BLOCKTYPE_ALIGNED) && (num_extra_bits >= 3)) {
 290
 291                 verbatim_bits = position_footer >> 3;
 292                 ret = bitstream_put_bits(out, verbatim_bits,
 293                                          num_extra_bits - 3);
 294                 if (ret != 0)
 295                         return ret;
 296
 297                 aligned_bits = (position_footer & 7);
 298                 ret = bitstream_put_bits(out,
 299                                          codes->aligned_codewords[aligned_bits],
 300                                          codes->aligned_lens[aligned_bits]);
 301                 if (ret != 0)
 302                         return ret;
 303         } else {
 304                 /* verbatim bits is the same as the position
 305                  * footer, in this case. */
 306                 ret = bitstream_put_bits(out, position_footer, num_extra_bits);
 307                 if (ret != 0)
 308                         return ret;
 309         }
 310         return 0;
 311 }
 312
 313 /*
 314  * Writes all compressed literals in a block, both matches and literal bytes, to
 315  * the output bitstream.
 316  *
 317  * @out:         The output bitstream.
 318  * @block_type:  The type of the block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM)
 319  * @match_tab[]:   The array of matches that will be output.  It has length
 320  *                      of @num_compressed_literals.
 321  * @num_compressed_literals:  Number of compressed literals to be output.
 322  * @codes:      Pointer to a structure that contains the codewords for the
 323  *                      main, length, and aligned offset Huffman codes.
 324  */
 325 static int lzx_write_compressed_literals(struct output_bitstream *ostream,
 326                                          int block_type,
 327                                          const u32 match_tab[],
 328                                          uint  num_compressed_literals,
 329                                          const struct lzx_codes *codes)
 330 {
 331         uint i;
 332         u32 match;
 333         int ret;
 334
 335         for (i = 0; i < num_compressed_literals; i++) {
 336                 match = match_tab[i];
 337
 338                 /* High bit of the match indicates whether the match is an
 339                  * actual match (1) or a literal uncompressed byte (0) */
 340                 if (match & 0x80000000) {
 341                         /* match */
 342                         ret = lzx_write_match(ostream, block_type, match,
 343                                               codes);
 344                         if (ret != 0)
 345                                 return ret;
 346                 } else {
 347                         /* literal byte */
 348                         wimlib_assert(match < LZX_NUM_CHARS);
 349                         ret = bitstream_put_bits(ostream,
 350                                                  codes->main_codewords[match],
 351                                                  codes->main_lens[match]);
 352                         if (ret != 0)
 353                                 return ret;
 354                 }
 355         }
 356         return 0;
 357 }
 358
 359 /*
 360  * Writes a compressed Huffman tree to the output, preceded by the pretree for
 361  * it.
 362  *
 363  * The Huffman tree is represented in the output as a series of path lengths
 364  * from which the canonical Huffman code can be reconstructed.  The path lengths
 365  * themselves are compressed using a separate Huffman code, the pretree, which
 366  * consists of LZX_PRETREE_NUM_SYMBOLS (= 20) symbols that cover all possible code
 367  * lengths, plus extra codes for repeated lengths.  The path lengths of the
 368  * pretree precede the path lengths of the larger code and are uncompressed,
 369  * consisting of 20 entries of 4 bits each.
 370  *
 371  * @out:        The bitstream for the compressed output.
 372  * @lens:       The code lengths for the Huffman tree, indexed by symbol.
 373  * @num_symbols:        The number of symbols in the code.
 374  */
 375 static int lzx_write_compressed_tree(struct output_bitstream *out,
 376                                 const u8 lens[],
 377                                 uint num_symbols)
 378 {
 379         /* Frequencies of the length symbols, including the RLE symbols (NOT the
 380          * actual lengths themselves). */
 381         uint pretree_freqs[LZX_PRETREE_NUM_SYMBOLS];
 382         u8 pretree_lens[LZX_PRETREE_NUM_SYMBOLS];
 383         u16 pretree_codewords[LZX_PRETREE_NUM_SYMBOLS];
 384         u8 output_syms[num_symbols * 2];
 385         uint output_syms_idx;
 386         uint cur_run_len;
 387         uint i;
 388         uint len_in_run;
 389         uint additional_bits;
 390         char delta;
 391         u8 pretree_sym;
 392
 393         ZERO_ARRAY(pretree_freqs);
 394
 395         /* Since the code word lengths use a form of RLE encoding, the goal here
 396          * is to find each run of identical lengths when going through them in
 397          * symbol order (including runs of length 1).  For each run, as many
 398          * lengths are encoded using RLE as possible, and the rest are output
 399          * literally.
 400          *
 401          * output_syms[] will be filled in with the length symbols that will be
 402          * output, including RLE codes, not yet encoded using the pre-tree.
 403          *
 404          * cur_run_len keeps track of how many code word lengths are in the
 405          * current run of identical lengths.
 406          */
 407         output_syms_idx = 0;
 408         cur_run_len = 1;
 409         for (i = 1; i <= num_symbols; i++) {
 410
 411                 if (i != num_symbols && lens[i] == lens[i - 1]) {
 412                         /* Still in a run--- keep going. */
 413                         cur_run_len++;
 414                         continue;
 415                 }
 416
 417                 /* Run ended! Check if it is a run of zeroes or a run of
 418                  * nonzeroes. */
 419
 420                 /* The symbol that was repeated in the run--- not to be confused
 421                  * with the length *of* the run (cur_run_len) */
 422                 len_in_run = lens[i - 1];
 423
 424                 if (len_in_run == 0) {
 425                         /* A run of 0's.  Encode it in as few length
 426                          * codes as we can. */
 427
 428                         /* The magic length 18 indicates a run of 20 + n zeroes,
 429                          * where n is an uncompressed literal 5-bit integer that
 430                          * follows the magic length. */
 431                         while (cur_run_len >= 20) {
 432
 433                                 additional_bits = min(cur_run_len - 20, 0x1f);
 434                                 pretree_freqs[18]++;
 435                                 output_syms[output_syms_idx++] = 18;
 436                                 output_syms[output_syms_idx++] = additional_bits;
 437                                 cur_run_len -= 20 + additional_bits;
 438                         }
 439
 440                         /* The magic length 17 indicates a run of 4 + n zeroes,
 441                          * where n is an uncompressed literal 4-bit integer that
 442                          * follows the magic length. */
 443                         while (cur_run_len >= 4) {
 444                                 additional_bits = min(cur_run_len - 4, 0xf);
 445                                 pretree_freqs[17]++;
 446                                 output_syms[output_syms_idx++] = 17;
 447                                 output_syms[output_syms_idx++] = additional_bits;
 448                                 cur_run_len -= 4 + additional_bits;
 449                         }
 450
 451                 } else {
 452
 453                         /* A run of nonzero lengths. */
 454
 455                         /* The magic length 19 indicates a run of 4 + n
 456                          * nonzeroes, where n is a literal bit that follows the
 457                          * magic length, and where the value of the lengths in
 458                          * the run is given by an extra length symbol, encoded
 459                          * with the pretree, that follows the literal bit.
 460                          *
 461                          * The extra length symbol is encoded as a difference
 462                          * from the length of the codeword for the first symbol
 463                          * in the run in the previous tree.
 464                          * */
 465                         while (cur_run_len >= 4) {
 466                                 additional_bits = (cur_run_len > 4);
 467                                 delta = -(char)len_in_run;
 468                                 if (delta < 0)
 469                                         delta += 17;
 470                                 pretree_freqs[19]++;
 471                                 pretree_freqs[delta]++;
 472                                 output_syms[output_syms_idx++] = 19;
 473                                 output_syms[output_syms_idx++] = additional_bits;
 474                                 output_syms[output_syms_idx++] = delta;
 475                                 cur_run_len -= 4 + additional_bits;
 476                         }
 477                 }
 478
 479                 /* Any remaining lengths in the run are outputted without RLE,
 480                  * as a difference from the length of that codeword in the
 481                  * previous tree. */
 482                 while (cur_run_len--) {
 483                         delta = -(char)len_in_run;
 484                         if (delta < 0)
 485                                 delta += 17;
 486
 487                         pretree_freqs[delta]++;
 488                         output_syms[output_syms_idx++] = delta;
 489                 }
 490
 491                 cur_run_len = 1;
 492         }
 493
 494         wimlib_assert(output_syms_idx < ARRAY_LEN(output_syms));
 495
 496         /* Build the pretree from the frequencies of the length symbols. */
 497
 498         make_canonical_huffman_code(LZX_PRETREE_NUM_SYMBOLS,
 499                                     LZX_MAX_CODEWORD_LEN,
 500                                     pretree_freqs, pretree_lens,
 501                                     pretree_codewords);
 502
 503         /* Write the lengths of the pretree codes to the output. */
 504         for (i = 0; i < LZX_PRETREE_NUM_SYMBOLS; i++)
 505                 bitstream_put_bits(out, pretree_lens[i],
 506                                    LZX_PRETREE_ELEMENT_SIZE);
 507
 508         /* Write the length symbols, encoded with the pretree, to the output. */
 509
 510         i = 0;
 511         while (i < output_syms_idx) {
 512                 pretree_sym = output_syms[i++];
 513
 514                 bitstream_put_bits(out, pretree_codewords[pretree_sym],
 515                                    pretree_lens[pretree_sym]);
 516                 switch (pretree_sym) {
 517                 case 17:
 518                         bitstream_put_bits(out, output_syms[i++], 4);
 519                         break;
 520                 case 18:
 521                         bitstream_put_bits(out, output_syms[i++], 5);
 522                         break;
 523                 case 19:
 524                         bitstream_put_bits(out, output_syms[i++], 1);
 525                         bitstream_put_bits(out,
 526                                            pretree_codewords[output_syms[i]],
 527                                            pretree_lens[output_syms[i]]);
 528                         i++;
 529                         break;
 530                 default:
 531                         break;
 532                 }
 533         }
 534         return 0;
 535 }
 536
 537 /* Builds the canonical Huffman code for the main tree, the length tree, and the
 538  * aligned offset tree. */
 539 static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs,
 540                                 struct lzx_codes *codes)
 541 {
 542         make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS,
 543                                         LZX_MAX_CODEWORD_LEN,
 544                                         freq_tabs->main_freq_table,
 545                                         codes->main_lens,
 546                                         codes->main_codewords);
 547
 548         make_canonical_huffman_code(LZX_LENTREE_NUM_SYMBOLS,
 549                                         LZX_MAX_CODEWORD_LEN,
 550                                         freq_tabs->len_freq_table,
 551                                         codes->len_lens,
 552                                         codes->len_codewords);
 553
 554         make_canonical_huffman_code(LZX_ALIGNEDTREE_NUM_SYMBOLS, 8,
 555                                         freq_tabs->aligned_freq_table,
 556                                         codes->aligned_lens,
 557                                         codes->aligned_codewords);
 558 }
 559
 560 /* Do the 'E8' preprocessing, where the targets of x86 CALL instructions were
 561  * changed from relative offsets to absolute offsets.  This type of
 562  * preprocessing can be used on any binary data even if it is not actually
 563  * machine code.  It seems to always be used in WIM files, even though there is
 564  * no bit to indicate that it actually is used, unlike in the LZX compressed
 565  * format as used in other file formats such as the cabinet format, where a bit
 566  * is reserved for that purpose. */
 567 static void do_call_insn_preprocessing(u8 uncompressed_data[],
 568                                                 uint uncompressed_data_len)
 569 {
 570         int i = 0;
 571         int file_size = LZX_MAGIC_FILESIZE;
 572         int32_t rel_offset;
 573         int32_t abs_offset;
 574
 575         /* Not enabled in the last 6 bytes, which means the 5-byte call
 576          * instruction cannot start in the last *10* bytes. */
 577         while (i < uncompressed_data_len - 10) {
 578                 if (uncompressed_data[i] != 0xe8) {
 579                         i++;
 580                         continue;
 581                 }
 582                 rel_offset = to_le32(*(int32_t*)(uncompressed_data + i + 1));
 583
 584                 if (rel_offset >= -i && rel_offset < file_size) {
 585                         if (rel_offset < file_size - i) {
 586                                 /* "good translation" */
 587                                 abs_offset = rel_offset + i;
 588                         } else {
 589                                 /* "compensating translation" */
 590                                 abs_offset = rel_offset - file_size;
 591                         }
 592                         *(int32_t*)(uncompressed_data + i + 1) = to_le32(abs_offset);
 593                 }
 594                 i += 5;
 595         }
 596 }
 597
 598
 599 static const struct lz_params lzx_lz_params = {
 600         .min_match      = 3,
 601         .max_match      = LZX_MAX_MATCH,
 602         .good_match     = LZX_MAX_MATCH,
 603         .nice_match     = LZX_MAX_MATCH,
 604         .max_chain_len  = LZX_MAX_MATCH,
 605         .max_lazy_match = LZX_MAX_MATCH,
 606         .too_far        = 4096,
 607 };
 608
 609 /*
 610  * Performs LZX compression on a block of data.
 611  *
 612  * @__uncompressed_data:  Pointer to the data to be compressed.
 613  * @uncompressed_len:     Length, in bytes, of the data to be compressed.
 614  * @compressed_data:      Pointer to a location at least (@uncompressed_len - 1)
 615  *                              bytes long into which the compressed data may be
 616  *                              written.
 617  * @compressed_len_ret:   A pointer to an unsigned int into which the length of
 618  *                              the compressed data may be returned.
 619  *
 620  * Returns zero if compression was successfully performed.  In that case
 621  * @compressed_data and @compressed_len_ret will contain the compressed data and
 622  * its length.  A return value of nonzero means that compressing the data did
 623  * not reduce its size, and @compressed_data will not contain the full
 624  * compressed data.
 625  */
 626 int lzx_compress(const void *__uncompressed_data, uint uncompressed_len,
 627                  void *compressed_data, uint *compressed_len_ret)
 628 {
 629         struct output_bitstream ostream;
 630         u8 uncompressed_data[uncompressed_len + LZX_MAX_MATCH];
 631         struct lzx_freq_tables freq_tabs;
 632         struct lzx_codes codes;
 633         u32 match_tab[uncompressed_len];
 634         struct lru_queue queue = {.R0 = 1, .R1 = 1, .R2 = 1};
 635         uint num_matches;
 636         uint compressed_len;
 637         uint i;
 638         int ret;
 639
 640         LZX_DEBUG("uncompressed_len = %u\n", uncompressed_len);
 641
 642         if (uncompressed_len < 100)
 643                 return 1;
 644
 645
 646         memset(&freq_tabs, 0, sizeof(freq_tabs));
 647
 648         /* The input data must be preprocessed. To avoid changing the original
 649          * input, copy it to a temporary buffer. */
 650         memcpy(uncompressed_data, __uncompressed_data, uncompressed_len);
 651
 652
 653         /* Before doing any actual compression, do the call instruction (0xe8
 654          * byte) translation on the uncompressed data. */
 655         do_call_insn_preprocessing(uncompressed_data, uncompressed_len);
 656
 657
 658         /* Determine the sequence of matches and literals that will be output,
 659          * and in the process, keep counts of the number of times each symbol
 660          * will be output, so that the Huffman trees can be made. */
 661
 662         num_matches = lz_analyze_block(uncompressed_data, uncompressed_len,
 663                                        match_tab, lzx_record_match,
 664                                        lzx_record_literal, &freq_tabs,
 665                                        &queue, freq_tabs.main_freq_table,
 666                                        &lzx_lz_params);
 667
 668         LZX_DEBUG("using %u matches\n", num_matches);
 669
 670
 671         lzx_make_huffman_codes(&freq_tabs, &codes);
 672
 673         /* Initialize the output bitstream. */
 674         init_output_bitstream(&ostream, compressed_data, uncompressed_len - 1);
 675
 676         /* The first three bits tell us what kind of block it is, and are one
 677          * of the LZX_BLOCKTYPE_* values.  */
 678         bitstream_put_bits(&ostream, LZX_BLOCKTYPE_ALIGNED, 3);
 679
 680         /* The next bit indicates whether the block size is the default (32768),
 681          * indicated by a 1 bit, or whether the block size is given by the next
 682          * 16 bits, indicated by a 0 bit. */
 683         if (uncompressed_len == 32768) {
 684                 bitstream_put_bits(&ostream, 1, 1);
 685         } else {
 686                 bitstream_put_bits(&ostream, 0, 1);
 687                 bitstream_put_bits(&ostream, uncompressed_len, 16);
 688         }
 689
 690         /* Write out the aligned offset tree. Note that M$ lies and says that
 691          * the aligned offset tree comes after the length tree, but that is
 692          * wrong; it actually is before the main tree.  */
 693         for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
 694                 bitstream_put_bits(&ostream, codes.aligned_lens[i],
 695                                    LZX_ALIGNEDTREE_ELEMENT_SIZE);
 696
 697         /* Write the pre-tree and lengths for the first LZX_NUM_CHARS symbols in the
 698          * main tree. */
 699         ret = lzx_write_compressed_tree(&ostream, codes.main_lens,
 700                                         LZX_NUM_CHARS);
 701         if (ret != 0)
 702                 return ret;
 703
 704         /* Write the pre-tree and symbols for the rest of the main tree. */
 705         ret = lzx_write_compressed_tree(&ostream, codes.main_lens +
 706                                         LZX_NUM_CHARS,
 707                                         LZX_MAINTREE_NUM_SYMBOLS -
 708                                                 LZX_NUM_CHARS);
 709         if (ret != 0)
 710                 return ret;
 711
 712         /* Write the pre-tree and symbols for the length tree. */
 713         ret = lzx_write_compressed_tree(&ostream, codes.len_lens,
 714                                         LZX_LENTREE_NUM_SYMBOLS);
 715         if (ret != 0)
 716                 return ret;
 717
 718         /* Write the compressed literals. */
 719         ret = lzx_write_compressed_literals(&ostream, LZX_BLOCKTYPE_ALIGNED,
 720                                             match_tab, num_matches, &codes);
 721         if (ret != 0)
 722                 return ret;
 723
 724         ret = flush_output_bitstream(&ostream);
 725         if (ret != 0)
 726                 return ret;
 727
 728         compressed_len = ostream.bit_output - (u8*)compressed_data;
 729
 730         LZX_DEBUG("Compressed %u => %u bytes\n",
 731                         uncompressed_len, compressed_len);
 732
 733         *compressed_len_ret = compressed_len;
 734
 735 #ifdef ENABLE_VERIFY_COMPRESSION
 736         /* Verify that we really get the same thing back when decompressing. */
 737         u8 buf[uncompressed_len];
 738         ret = lzx_decompress(compressed_data, compressed_len, buf,
 739                              uncompressed_len);
 740         if (ret != 0) {
 741                 ERROR("ERROR: Failed to decompress data we compressed!\n");
 742                 exit(0);
 743                 abort();
 744         }
 745
 746         for (i = 0; i < uncompressed_len; i++) {
 747                 if (buf[i] != *((u8*)__uncompressed_data + i)) {
 748                         ERROR("Data we compressed didn't decompress to "
 749                                 "the original data (difference at byte %u of "
 750                                 "%u)\n", i + 1, uncompressed_len);
 751                         abort();
 752                 }
 753         }
 754         LZX_DEBUG("Compression verified to be correct.\n");
 755 #endif
 756
 757         return 0;
 758 }