wimlib.net Git - wimlib/blob - src/lzx-compress.c

   1 /*
   2  * lzx-compress.c
   3  *
   4  * LZX compression routines, originally based on code written by Matthew T.
   5  * Russotto (liblzxcomp), but heavily modified.
   6  */
   7
   8 /*
   9  * Copyright (C) 2002 Matthew T. Russotto
  10  * Copyright (C) 2012, 2013 Eric Biggers
  11  *
  12  * This file is part of wimlib, a library for working with WIM files.
  13  *
  14  * wimlib is free software; you can redistribute it and/or modify it under the
  15  * terms of the GNU General Public License as published by the Free
  16  * Software Foundation; either version 3 of the License, or (at your option)
  17  * any later version.
  18  *
  19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
  20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
  22  * details.
  23  *
  24  * You should have received a copy of the GNU General Public License
  25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
  26  */
  27
  28
  29 /*
  30  * This file provides wimlib_lzx_compress(), a function to compress an in-memory
  31  * buffer of data using LZX compression, as used in the WIM file format.
  32  *
  33  * Please see the comments in lzx-decompress.c for more information about this
  34  * compression format.
  35  *
  36  * One thing to keep in mind is that there is no sliding window, since the
  37  * window is always the entirety of a WIM chunk, which is at most WIM_CHUNK_SIZE
  38  * ( = 32768) bytes.
  39  *
  40  * The basic compression algorithm used here should be familiar if you are
  41  * familiar with Huffman trees and with other LZ77 and Huffman-based formats
  42  * such as DEFLATE.  Otherwise it can be quite tricky to understand.  Basically
  43  * it is the following:
  44  *
  45  * - Preprocess the input data (LZX-specific)
  46  * - Go through the input data and determine matches.  This part is based on
  47  *       code from zlib, and a hash table of 3-character strings is used to
  48  *       accelerate the process of finding matches.
  49  * - Build the Huffman trees based on the frequencies of symbols determined
  50  *       while recording matches.
  51  * - Output the block header, including the Huffman trees; then output the
  52  *       compressed stream of matches and literal characters.
  53  *
  54  * It is possible for a WIM chunk to include multiple LZX blocks, since for some
  55  * input data this will produce a better compression ratio (especially since
  56  * each block can include new Huffman codes).  However, producing multiple LZX
  57  * blocks from one input chunk is not yet implemented.
  58  */
  59
  60 #ifdef HAVE_CONFIG_H
  61 #  include "config.h"
  62 #endif
  63
  64 #include "wimlib.h"
  65 #include "wimlib/compress.h"
  66 #include "wimlib/lzx.h"
  67 #include "wimlib/util.h"
  68
  69 #include <stdlib.h>
  70 #include <string.h>
  71
  72
  73 /* Structure to contain the Huffman codes for the main, length, and aligned
  74  * offset trees. */
  75 struct lzx_codes {
  76         u16 main_codewords[LZX_MAINTREE_NUM_SYMBOLS];
  77         u8  main_lens[LZX_MAINTREE_NUM_SYMBOLS];
  78
  79         u16 len_codewords[LZX_LENTREE_NUM_SYMBOLS];
  80         u8  len_lens[LZX_LENTREE_NUM_SYMBOLS];
  81
  82         u16 aligned_codewords[LZX_ALIGNEDTREE_NUM_SYMBOLS];
  83         u8  aligned_lens[LZX_ALIGNEDTREE_NUM_SYMBOLS];
  84 };
  85
  86 struct lzx_freq_tables {
  87         freq_t main_freq_table[LZX_MAINTREE_NUM_SYMBOLS];
  88         freq_t len_freq_table[LZX_LENTREE_NUM_SYMBOLS];
  89         freq_t aligned_freq_table[LZX_ALIGNEDTREE_NUM_SYMBOLS];
  90 };
  91
  92 /* Returns the LZX position slot that corresponds to a given formatted offset.
  93  *
  94  * Logically, this returns the smallest i such that
  95  * formatted_offset >= lzx_position_base[i].
  96  *
  97  * The actual implementation below takes advantage of the regularity of the
  98  * numbers in the lzx_position_base array to calculate the slot directly from
  99  * the formatted offset without actually looking at the array.
 100  */
 101 static inline unsigned
 102 lzx_get_position_slot(unsigned formatted_offset)
 103 {
 104 #if 0
 105         /*
 106          * Slots 36-49 (formatted_offset >= 262144) can be found by
 107          * (formatted_offset/131072) + 34 == (formatted_offset >> 17) + 34;
 108          * however, this check for formatted_offset >= 262144 is commented out
 109          * because WIM chunks cannot be that large.
 110          */
 111         if (formatted_offset >= 262144) {
 112                 return (formatted_offset >> 17) + 34;
 113         } else
 114 #endif
 115         {
 116                 /* Note: this part here only works if:
 117                  *
 118                  *    2 <= formatted_offset < 655360
 119                  *
 120                  * It is < 655360 because the frequency of the position bases
 121                  * increases starting at the 655360 entry, and it is >= 2
 122                  * because the below calculation fails if the most significant
 123                  * bit is lower than the 2's place. */
 124                 wimlib_assert(formatted_offset >= 2 && formatted_offset < 655360);
 125                 unsigned mssb_idx = bsr32(formatted_offset);
 126                 return (mssb_idx << 1) |
 127                         ((formatted_offset >> (mssb_idx - 1)) & 1);
 128         }
 129 }
 130
 131 static u32
 132 lzx_record_literal(u8 literal, void *__main_freq_tab)
 133 {
 134         freq_t *main_freq_tab = __main_freq_tab;
 135         main_freq_tab[literal]++;
 136         return literal;
 137 }
 138
 139 /* Constructs a match from an offset and a length, and updates the LRU queue and
 140  * the frequency of symbols in the main, length, and aligned offset alphabets.
 141  * The return value is a 32-bit number that provides the match in an
 142  * intermediate representation documented below. */
 143 static u32
 144 lzx_record_match(unsigned match_offset, unsigned match_len,
 145                  void *__freq_tabs, void *__queue)
 146 {
 147         struct lzx_freq_tables *freq_tabs = __freq_tabs;
 148         struct lru_queue *queue = __queue;
 149         unsigned position_slot;
 150         unsigned position_footer = 0;
 151         u32 match;
 152         u32 len_header;
 153         u32 len_pos_header;
 154         unsigned len_footer;
 155         unsigned adjusted_match_len;
 156
 157         wimlib_assert(match_len >= LZX_MIN_MATCH && match_len <= LZX_MAX_MATCH);
 158         wimlib_assert(match_offset != 0);
 159
 160         /* If possible, encode this offset as a repeated offset. */
 161         if (match_offset == queue->R0) {
 162                 position_slot = 0;
 163         } else if (match_offset == queue->R1) {
 164                 swap(queue->R0, queue->R1);
 165                 position_slot = 1;
 166         } else if (match_offset == queue->R2) {
 167                 swap(queue->R0, queue->R2);
 168                 position_slot = 2;
 169         } else {
 170                 /* Not a repeated offset. */
 171
 172                 /* offsets of 0, 1, and 2 are reserved for the repeated offset
 173                  * codes, so non-repeated offsets must be encoded as 3+.  The
 174                  * minimum offset is 1, so encode the offsets offset by 2. */
 175                 unsigned formatted_offset = match_offset + LZX_MIN_MATCH;
 176
 177                 queue->R2 = queue->R1;
 178                 queue->R1 = queue->R0;
 179                 queue->R0 = match_offset;
 180
 181                 /* The (now-formatted) offset will actually be encoded as a
 182                  * small position slot number that maps to a certain hard-coded
 183                  * offset (position base), followed by a number of extra bits---
 184                  * the position footer--- that are added to the position base to
 185                  * get the original formatted offset. */
 186
 187                 position_slot = lzx_get_position_slot(formatted_offset);
 188                 position_footer = formatted_offset &
 189                                   ((1 << lzx_get_num_extra_bits(position_slot)) - 1);
 190         }
 191
 192         adjusted_match_len = match_len - LZX_MIN_MATCH;
 193
 194         /* Pack the position slot, position footer, and match length into an
 195          * intermediate representation.
 196          *
 197          * bits    description
 198          * ----    -----------------------------------------------------------
 199          *
 200          * 31      1 if a match, 0 if a literal.
 201          *
 202          * 30-25   position slot.  This can be at most 50, so it will fit in 6
 203          *         bits.
 204          *
 205          * 8-24    position footer.  This is the offset of the real formatted
 206          *         offset from the position base.  This can be at most 17 bits
 207          *         (since lzx_extra_bits[LZX_NUM_POSITION_SLOTS - 1] is 17).
 208          *
 209          * 0-7     length of match, offset by 2.  This can be at most
 210          *         (LZX_MAX_MATCH - 2) == 255, so it will fit in 8 bits.  */
 211         match = 0x80000000 |
 212                 (position_slot << 25) |
 213                 (position_footer << 8) |
 214                 (adjusted_match_len);
 215
 216         /* The match length must be at least 2, so let the adjusted match length
 217          * be the match length minus 2.
 218          *
 219          * If it is less than 7, the adjusted match length is encoded as a 3-bit
 220          * number offset by 2.  Otherwise, the 3-bit length header is all 1's
 221          * and the actual adjusted length is given as a symbol encoded with the
 222          * length tree, offset by 7.
 223          */
 224         if (adjusted_match_len < LZX_NUM_PRIMARY_LENS) {
 225                 len_header = adjusted_match_len;
 226         } else {
 227                 len_header = LZX_NUM_PRIMARY_LENS;
 228                 len_footer = adjusted_match_len - LZX_NUM_PRIMARY_LENS;
 229                 freq_tabs->len_freq_table[len_footer]++;
 230         }
 231         len_pos_header = (position_slot << 3) | len_header;
 232
 233         wimlib_assert(len_pos_header < LZX_MAINTREE_NUM_SYMBOLS - LZX_NUM_CHARS);
 234
 235         freq_tabs->main_freq_table[len_pos_header + LZX_NUM_CHARS]++;
 236
 237         /* Equivalent to:
 238          * if (lzx_extra_bits[position_slot] >= 3) */
 239         if (position_slot >= 8)
 240                 freq_tabs->aligned_freq_table[position_footer & 7]++;
 241
 242         return match;
 243 }
 244
 245 /*
 246  * Writes a compressed literal match to the output.
 247  *
 248  * @out:         The output bitstream.
 249  * @block_type:  The type of the block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM)
 250  * @match:       The match, encoded as a 32-bit number.
 251  * @codes:      Pointer to a structure that contains the codewords for the
 252  *                      main, length, and aligned offset Huffman codes.
 253  */
 254 static int
 255 lzx_write_match(struct output_bitstream *out, int block_type,
 256                 u32 match, const struct lzx_codes *codes)
 257 {
 258         /* low 8 bits are the match length minus 2 */
 259         unsigned match_len_minus_2 = match & 0xff;
 260         /* Next 17 bits are the position footer */
 261         unsigned position_footer = (match >> 8) & 0x1ffff;      /* 17 bits */
 262         /* Next 6 bits are the position slot. */
 263         unsigned position_slot = (match >> 25) & 0x3f;  /* 6 bits */
 264         unsigned len_header;
 265         unsigned len_footer;
 266         unsigned len_pos_header;
 267         unsigned main_symbol;
 268         unsigned num_extra_bits;
 269         unsigned verbatim_bits;
 270         unsigned aligned_bits;
 271         int ret;
 272
 273         /* If the match length is less than MIN_MATCH (= 2) +
 274          * NUM_PRIMARY_LENS (= 7), the length header contains
 275          * the match length minus MIN_MATCH, and there is no
 276          * length footer.
 277          *
 278          * Otherwise, the length header contains
 279          * NUM_PRIMARY_LENS, and the length footer contains
 280          * the match length minus NUM_PRIMARY_LENS minus
 281          * MIN_MATCH. */
 282         if (match_len_minus_2 < LZX_NUM_PRIMARY_LENS) {
 283                 len_header = match_len_minus_2;
 284                 /* No length footer-- mark it with a special
 285                  * value. */
 286                 len_footer = (unsigned)(-1);
 287         } else {
 288                 len_header = LZX_NUM_PRIMARY_LENS;
 289                 len_footer = match_len_minus_2 - LZX_NUM_PRIMARY_LENS;
 290         }
 291
 292         /* Combine the position slot with the length header into
 293          * a single symbol that will be encoded with the main
 294          * tree. */
 295         len_pos_header = (position_slot << 3) | len_header;
 296
 297         /* The actual main symbol is offset by LZX_NUM_CHARS because
 298          * values under LZX_NUM_CHARS are used to indicate a literal
 299          * byte rather than a match. */
 300         main_symbol = len_pos_header + LZX_NUM_CHARS;
 301
 302         /* Output main symbol. */
 303         ret = bitstream_put_bits(out, codes->main_codewords[main_symbol],
 304                                  codes->main_lens[main_symbol]);
 305         if (ret != 0)
 306                 return ret;
 307
 308         /* If there is a length footer, output it using the
 309          * length Huffman code. */
 310         if (len_footer != (unsigned)(-1)) {
 311                 ret = bitstream_put_bits(out, codes->len_codewords[len_footer],
 312                                          codes->len_lens[len_footer]);
 313                 if (ret != 0)
 314                         return ret;
 315         }
 316
 317         wimlib_assert(position_slot < LZX_NUM_POSITION_SLOTS);
 318
 319         num_extra_bits = lzx_get_num_extra_bits(position_slot);
 320
 321         /* For aligned offset blocks with at least 3 extra bits, output the
 322          * verbatim bits literally, then the aligned bits encoded using the
 323          * aligned offset tree.  Otherwise, only the verbatim bits need to be
 324          * output. */
 325         if ((block_type == LZX_BLOCKTYPE_ALIGNED) && (num_extra_bits >= 3)) {
 326
 327                 verbatim_bits = position_footer >> 3;
 328                 ret = bitstream_put_bits(out, verbatim_bits,
 329                                          num_extra_bits - 3);
 330                 if (ret != 0)
 331                         return ret;
 332
 333                 aligned_bits = (position_footer & 7);
 334                 ret = bitstream_put_bits(out,
 335                                          codes->aligned_codewords[aligned_bits],
 336                                          codes->aligned_lens[aligned_bits]);
 337                 if (ret != 0)
 338                         return ret;
 339         } else {
 340                 /* verbatim bits is the same as the position
 341                  * footer, in this case. */
 342                 ret = bitstream_put_bits(out, position_footer, num_extra_bits);
 343                 if (ret != 0)
 344                         return ret;
 345         }
 346         return 0;
 347 }
 348
 349 /*
 350  * Writes all compressed literals in a block, both matches and literal bytes, to
 351  * the output bitstream.
 352  *
 353  * @out:         The output bitstream.
 354  * @block_type:  The type of the block (LZX_BLOCKTYPE_ALIGNED or LZX_BLOCKTYPE_VERBATIM)
 355  * @match_tab[]:   The array of matches that will be output.  It has length
 356  *                      of @num_compressed_literals.
 357  * @num_compressed_literals:  Number of compressed literals to be output.
 358  * @codes:      Pointer to a structure that contains the codewords for the
 359  *                      main, length, and aligned offset Huffman codes.
 360  */
 361 static int
 362 lzx_write_compressed_literals(struct output_bitstream *ostream,
 363                               int block_type,
 364                               const u32 match_tab[],
 365                               unsigned  num_compressed_literals,
 366                               const struct lzx_codes *codes)
 367 {
 368         unsigned i;
 369         u32 match;
 370         int ret;
 371
 372         for (i = 0; i < num_compressed_literals; i++) {
 373                 match = match_tab[i];
 374
 375                 /* High bit of the match indicates whether the match is an
 376                  * actual match (1) or a literal uncompressed byte (0) */
 377                 if (match & 0x80000000) {
 378                         /* match */
 379                         ret = lzx_write_match(ostream, block_type, match,
 380                                               codes);
 381                         if (ret != 0)
 382                                 return ret;
 383                 } else {
 384                         /* literal byte */
 385                         wimlib_assert(match < LZX_NUM_CHARS);
 386                         ret = bitstream_put_bits(ostream,
 387                                                  codes->main_codewords[match],
 388                                                  codes->main_lens[match]);
 389                         if (ret != 0)
 390                                 return ret;
 391                 }
 392         }
 393         return 0;
 394 }
 395
 396 /*
 397  * Writes a compressed Huffman tree to the output, preceded by the pretree for
 398  * it.
 399  *
 400  * The Huffman tree is represented in the output as a series of path lengths
 401  * from which the canonical Huffman code can be reconstructed.  The path lengths
 402  * themselves are compressed using a separate Huffman code, the pretree, which
 403  * consists of LZX_PRETREE_NUM_SYMBOLS (= 20) symbols that cover all possible code
 404  * lengths, plus extra codes for repeated lengths.  The path lengths of the
 405  * pretree precede the path lengths of the larger code and are uncompressed,
 406  * consisting of 20 entries of 4 bits each.
 407  *
 408  * @out:        The bitstream for the compressed output.
 409  * @lens:       The code lengths for the Huffman tree, indexed by symbol.
 410  * @num_symbols:        The number of symbols in the code.
 411  */
 412 static int
 413 lzx_write_compressed_tree(struct output_bitstream *out,
 414                           const u8 lens[], unsigned num_symbols)
 415 {
 416         /* Frequencies of the length symbols, including the RLE symbols (NOT the
 417          * actual lengths themselves). */
 418         freq_t pretree_freqs[LZX_PRETREE_NUM_SYMBOLS];
 419         u8 pretree_lens[LZX_PRETREE_NUM_SYMBOLS];
 420         u16 pretree_codewords[LZX_PRETREE_NUM_SYMBOLS];
 421         u8 output_syms[num_symbols * 2];
 422         unsigned output_syms_idx;
 423         unsigned cur_run_len;
 424         unsigned i;
 425         unsigned len_in_run;
 426         unsigned additional_bits;
 427         char delta;
 428         u8 pretree_sym;
 429
 430         ZERO_ARRAY(pretree_freqs);
 431
 432         /* Since the code word lengths use a form of RLE encoding, the goal here
 433          * is to find each run of identical lengths when going through them in
 434          * symbol order (including runs of length 1).  For each run, as many
 435          * lengths are encoded using RLE as possible, and the rest are output
 436          * literally.
 437          *
 438          * output_syms[] will be filled in with the length symbols that will be
 439          * output, including RLE codes, not yet encoded using the pre-tree.
 440          *
 441          * cur_run_len keeps track of how many code word lengths are in the
 442          * current run of identical lengths.
 443          */
 444         output_syms_idx = 0;
 445         cur_run_len = 1;
 446         for (i = 1; i <= num_symbols; i++) {
 447
 448                 if (i != num_symbols && lens[i] == lens[i - 1]) {
 449                         /* Still in a run--- keep going. */
 450                         cur_run_len++;
 451                         continue;
 452                 }
 453
 454                 /* Run ended! Check if it is a run of zeroes or a run of
 455                  * nonzeroes. */
 456
 457                 /* The symbol that was repeated in the run--- not to be confused
 458                  * with the length *of* the run (cur_run_len) */
 459                 len_in_run = lens[i - 1];
 460
 461                 if (len_in_run == 0) {
 462                         /* A run of 0's.  Encode it in as few length
 463                          * codes as we can. */
 464
 465                         /* The magic length 18 indicates a run of 20 + n zeroes,
 466                          * where n is an uncompressed literal 5-bit integer that
 467                          * follows the magic length. */
 468                         while (cur_run_len >= 20) {
 469
 470                                 additional_bits = min(cur_run_len - 20, 0x1f);
 471                                 pretree_freqs[18]++;
 472                                 output_syms[output_syms_idx++] = 18;
 473                                 output_syms[output_syms_idx++] = additional_bits;
 474                                 cur_run_len -= 20 + additional_bits;
 475                         }
 476
 477                         /* The magic length 17 indicates a run of 4 + n zeroes,
 478                          * where n is an uncompressed literal 4-bit integer that
 479                          * follows the magic length. */
 480                         while (cur_run_len >= 4) {
 481                                 additional_bits = min(cur_run_len - 4, 0xf);
 482                                 pretree_freqs[17]++;
 483                                 output_syms[output_syms_idx++] = 17;
 484                                 output_syms[output_syms_idx++] = additional_bits;
 485                                 cur_run_len -= 4 + additional_bits;
 486                         }
 487
 488                 } else {
 489
 490                         /* A run of nonzero lengths. */
 491
 492                         /* The magic length 19 indicates a run of 4 + n
 493                          * nonzeroes, where n is a literal bit that follows the
 494                          * magic length, and where the value of the lengths in
 495                          * the run is given by an extra length symbol, encoded
 496                          * with the pretree, that follows the literal bit.
 497                          *
 498                          * The extra length symbol is encoded as a difference
 499                          * from the length of the codeword for the first symbol
 500                          * in the run in the previous tree.
 501                          * */
 502                         while (cur_run_len >= 4) {
 503                                 additional_bits = (cur_run_len > 4);
 504                                 delta = -(char)len_in_run;
 505                                 if (delta < 0)
 506                                         delta += 17;
 507                                 pretree_freqs[19]++;
 508                                 pretree_freqs[(unsigned char)delta]++;
 509                                 output_syms[output_syms_idx++] = 19;
 510                                 output_syms[output_syms_idx++] = additional_bits;
 511                                 output_syms[output_syms_idx++] = delta;
 512                                 cur_run_len -= 4 + additional_bits;
 513                         }
 514                 }
 515
 516                 /* Any remaining lengths in the run are outputted without RLE,
 517                  * as a difference from the length of that codeword in the
 518                  * previous tree. */
 519                 while (cur_run_len--) {
 520                         delta = -(char)len_in_run;
 521                         if (delta < 0)
 522                                 delta += 17;
 523
 524                         pretree_freqs[(unsigned char)delta]++;
 525                         output_syms[output_syms_idx++] = delta;
 526                 }
 527
 528                 cur_run_len = 1;
 529         }
 530
 531         wimlib_assert(output_syms_idx < ARRAY_LEN(output_syms));
 532
 533         /* Build the pretree from the frequencies of the length symbols. */
 534
 535         make_canonical_huffman_code(LZX_PRETREE_NUM_SYMBOLS,
 536                                     LZX_MAX_CODEWORD_LEN,
 537                                     pretree_freqs, pretree_lens,
 538                                     pretree_codewords);
 539
 540         /* Write the lengths of the pretree codes to the output. */
 541         for (i = 0; i < LZX_PRETREE_NUM_SYMBOLS; i++)
 542                 bitstream_put_bits(out, pretree_lens[i],
 543                                    LZX_PRETREE_ELEMENT_SIZE);
 544
 545         /* Write the length symbols, encoded with the pretree, to the output. */
 546
 547         i = 0;
 548         while (i < output_syms_idx) {
 549                 pretree_sym = output_syms[i++];
 550
 551                 bitstream_put_bits(out, pretree_codewords[pretree_sym],
 552                                    pretree_lens[pretree_sym]);
 553                 switch (pretree_sym) {
 554                 case 17:
 555                         bitstream_put_bits(out, output_syms[i++], 4);
 556                         break;
 557                 case 18:
 558                         bitstream_put_bits(out, output_syms[i++], 5);
 559                         break;
 560                 case 19:
 561                         bitstream_put_bits(out, output_syms[i++], 1);
 562                         bitstream_put_bits(out,
 563                                            pretree_codewords[output_syms[i]],
 564                                            pretree_lens[output_syms[i]]);
 565                         i++;
 566                         break;
 567                 default:
 568                         break;
 569                 }
 570         }
 571         return 0;
 572 }
 573
 574 /* Builds the canonical Huffman code for the main tree, the length tree, and the
 575  * aligned offset tree. */
 576 static void
 577 lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs,
 578                        struct lzx_codes *codes)
 579 {
 580         make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS,
 581                                         LZX_MAX_CODEWORD_LEN,
 582                                         freq_tabs->main_freq_table,
 583                                         codes->main_lens,
 584                                         codes->main_codewords);
 585
 586         make_canonical_huffman_code(LZX_LENTREE_NUM_SYMBOLS,
 587                                         LZX_MAX_CODEWORD_LEN,
 588                                         freq_tabs->len_freq_table,
 589                                         codes->len_lens,
 590                                         codes->len_codewords);
 591
 592         make_canonical_huffman_code(LZX_ALIGNEDTREE_NUM_SYMBOLS, 8,
 593                                         freq_tabs->aligned_freq_table,
 594                                         codes->aligned_lens,
 595                                         codes->aligned_codewords);
 596 }
 597
 598 static void
 599 do_call_insn_translation(u32 *call_insn_target, int input_pos,
 600                          s32 file_size)
 601 {
 602         s32 abs_offset;
 603         s32 rel_offset;
 604
 605         rel_offset = le32_to_cpu(*call_insn_target);
 606         if (rel_offset >= -input_pos && rel_offset < file_size) {
 607                 if (rel_offset < file_size - input_pos) {
 608                         /* "good translation" */
 609                         abs_offset = rel_offset + input_pos;
 610                 } else {
 611                         /* "compensating translation" */
 612                         abs_offset = rel_offset - file_size;
 613                 }
 614                 *call_insn_target = cpu_to_le32(abs_offset);
 615         }
 616 }
 617
 618 /* This is the reverse of undo_call_insn_preprocessing() in lzx-decompress.c.
 619  * See the comment above that function for more information. */
 620 static void
 621 do_call_insn_preprocessing(u8 uncompressed_data[], int uncompressed_data_len)
 622 {
 623         for (int i = 0; i < uncompressed_data_len - 10; i++) {
 624                 if (uncompressed_data[i] == 0xe8) {
 625                         do_call_insn_translation((u32*)&uncompressed_data[i + 1],
 626                                                  i,
 627                                                  LZX_WIM_MAGIC_FILESIZE);
 628                         i += 4;
 629                 }
 630         }
 631 }
 632
 633
 634 static const struct lz_params lzx_lz_params = {
 635
 636          /* LZX_MIN_MATCH == 2, but 2-character matches are rarely useful; the
 637           * minimum match for compression is set to 3 instead. */
 638         .min_match      = 3,
 639
 640         .max_match      = LZX_MAX_MATCH,
 641         .good_match     = LZX_MAX_MATCH,
 642         .nice_match     = LZX_MAX_MATCH,
 643         .max_chain_len  = LZX_MAX_MATCH,
 644         .max_lazy_match = LZX_MAX_MATCH,
 645         .too_far        = 4096,
 646 };
 647
 648 /* Documented in wimlib.h */
 649 WIMLIBAPI unsigned
 650 wimlib_lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len,
 651                     void *compressed_data)
 652 {
 653         struct output_bitstream ostream;
 654         u8 uncompressed_data[uncompressed_len + 8];
 655         struct lzx_freq_tables freq_tabs;
 656         struct lzx_codes codes;
 657         u32 match_tab[uncompressed_len];
 658         struct lru_queue queue;
 659         unsigned num_matches;
 660         unsigned compressed_len;
 661         unsigned i;
 662         int ret;
 663         int block_type = LZX_BLOCKTYPE_ALIGNED;
 664
 665         wimlib_assert(uncompressed_len <= 32768);
 666
 667         if (uncompressed_len < 100)
 668                 return 0;
 669
 670         memset(&freq_tabs, 0, sizeof(freq_tabs));
 671         queue.R0 = 1;
 672         queue.R1 = 1;
 673         queue.R2 = 1;
 674
 675         /* The input data must be preprocessed. To avoid changing the original
 676          * input, copy it to a temporary buffer. */
 677         memcpy(uncompressed_data, __uncompressed_data, uncompressed_len);
 678         memset(uncompressed_data + uncompressed_len, 0, 8);
 679
 680         /* Before doing any actual compression, do the call instruction (0xe8
 681          * byte) translation on the uncompressed data. */
 682         do_call_insn_preprocessing(uncompressed_data, uncompressed_len);
 683
 684         /* Determine the sequence of matches and literals that will be output,
 685          * and in the process, keep counts of the number of times each symbol
 686          * will be output, so that the Huffman trees can be made. */
 687
 688         num_matches = lz_analyze_block(uncompressed_data, uncompressed_len,
 689                                        match_tab, lzx_record_match,
 690                                        lzx_record_literal, &freq_tabs,
 691                                        &queue, freq_tabs.main_freq_table,
 692                                        &lzx_lz_params);
 693
 694         lzx_make_huffman_codes(&freq_tabs, &codes);
 695
 696         /* Initialize the output bitstream. */
 697         init_output_bitstream(&ostream, compressed_data, uncompressed_len - 1);
 698
 699         /* The first three bits tell us what kind of block it is, and are one
 700          * of the LZX_BLOCKTYPE_* values.  */
 701         bitstream_put_bits(&ostream, block_type, 3);
 702
 703         /* The next bit indicates whether the block size is the default (32768),
 704          * indicated by a 1 bit, or whether the block size is given by the next
 705          * 16 bits, indicated by a 0 bit. */
 706         if (uncompressed_len == 32768) {
 707                 bitstream_put_bits(&ostream, 1, 1);
 708         } else {
 709                 bitstream_put_bits(&ostream, 0, 1);
 710                 bitstream_put_bits(&ostream, uncompressed_len, 16);
 711         }
 712
 713         /* Write out the aligned offset tree. Note that M$ lies and says that
 714          * the aligned offset tree comes after the length tree, but that is
 715          * wrong; it actually is before the main tree.  */
 716         if (block_type == LZX_BLOCKTYPE_ALIGNED)
 717                 for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
 718                         bitstream_put_bits(&ostream, codes.aligned_lens[i],
 719                                            LZX_ALIGNEDTREE_ELEMENT_SIZE);
 720
 721         /* Write the pre-tree and lengths for the first LZX_NUM_CHARS symbols in the
 722          * main tree. */
 723         ret = lzx_write_compressed_tree(&ostream, codes.main_lens,
 724                                         LZX_NUM_CHARS);
 725         if (ret)
 726                 return 0;
 727
 728         /* Write the pre-tree and symbols for the rest of the main tree. */
 729         ret = lzx_write_compressed_tree(&ostream, codes.main_lens +
 730                                         LZX_NUM_CHARS,
 731                                         LZX_MAINTREE_NUM_SYMBOLS -
 732                                                 LZX_NUM_CHARS);
 733         if (ret)
 734                 return 0;
 735
 736         /* Write the pre-tree and symbols for the length tree. */
 737         ret = lzx_write_compressed_tree(&ostream, codes.len_lens,
 738                                         LZX_LENTREE_NUM_SYMBOLS);
 739         if (ret)
 740                 return 0;
 741
 742         /* Write the compressed literals. */
 743         ret = lzx_write_compressed_literals(&ostream, block_type,
 744                                             match_tab, num_matches, &codes);
 745         if (ret)
 746                 return 0;
 747
 748         ret = flush_output_bitstream(&ostream);
 749         if (ret)
 750                 return 0;
 751
 752         compressed_len = ostream.bit_output - (u8*)compressed_data;
 753
 754 #ifdef ENABLE_VERIFY_COMPRESSION
 755         /* Verify that we really get the same thing back when decompressing. */
 756         {
 757                 u8 buf[uncompressed_len];
 758                 ret = wimlib_lzx_decompress(compressed_data, compressed_len,
 759                                             buf, uncompressed_len);
 760                 if (ret != 0) {
 761                         ERROR("lzx_compress(): Failed to decompress data we compressed");
 762                         abort();
 763                 }
 764
 765                 for (i = 0; i < uncompressed_len; i++) {
 766                         if (buf[i] != *((u8*)__uncompressed_data + i)) {
 767                                 ERROR("lzx_compress(): Data we compressed didn't "
 768                                       "decompress to the original data (difference at "
 769                                       "byte %u of %u)", i + 1, uncompressed_len);
 770                                 abort();
 771                         }
 772                 }
 773         }
 774 #endif
 775         return compressed_len;
 776 }