Indentation

[wimlib] / src / lzx-comp.c
diff --git a/src/lzx-comp.c b/src/lzx-comp.c

index d06b0675d0f89be0db151aac8af12e784307566c..899d1dada6942ce83850a4706f35fb5d25228805 100644 (file)
--- a/src/lzx-comp.c
+++ b/src/lzx-comp.c
@@ -1,33 +1,35 @@
  /*
   * lzx-comp.c
   *
- * LZX compression routines.  
+ * LZX compression routines.
   *
   * This code was originally based on code written by Matthew T. Russotto
   *     (liblzxcomp).
- *
+ */
+
+/*
   * Copyright (C) 2002 Matthew T. Russotto
   * Copyright (C) 2012 Eric Biggers
   *
- * wimlib - Library for working with WIM files 
+ * This file is part of wimlib, a library for working with WIM files.
   *
- * This library is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by the Free
- * Software Foundation; either version 2.1 of the License, or (at your option) any
- * later version.
+ * wimlib is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
   *
- * This library is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
- * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
+ * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
   *
- * You should have received a copy of the GNU Lesser General Public License along
- * with this library; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place, Suite 330, Boston, MA 02111-1307 USA 
+ * You should have received a copy of the GNU General Public License
+ * along with wimlib; if not, see http://www.gnu.org/licenses/.
   */
  
  
  
-/* 
+/*
   * This file provides lzx_compress(), a function to compress an in-memory buffer
   * of data using LZX compression, as used in the WIM file format.
   *
@@ -39,7 +41,7 @@
   * tricky to understand.  Basically it is the following:
   *
   * - Preprocess the input data (LZX-specific)
- * - Go through the input data and determine matches.  This part is based on 
+ * - Go through the input data and determine matches.  This part is based on
   *       code from zlib, and a hash table of 3-character strings is used to
   *       accelerate the process of finding matches.
   * - Build the Huffman trees based on the frequencies of symbols determined
@@ -51,7 +53,6 @@
  
  #include "lzx.h"
  #include "comp.h"
-#include "huffman.h"
  #include <math.h>
  #include <stdlib.h>
  #include <string.h>
@@ -71,7 +72,7 @@ struct lzx_codes {
  };
  
  struct lzx_freq_tables {
-       u32 main_freq_table[LZX_MAINTREE_NUM_SYMBOLS]; 
+       u32 main_freq_table[LZX_MAINTREE_NUM_SYMBOLS];
         u32 len_freq_table[LZX_LENTREE_NUM_SYMBOLS];
         u32 aligned_freq_table[LZX_ALIGNEDTREE_NUM_SYMBOLS];
  };
@@ -91,13 +92,13 @@ static uint lzx_get_position_slot(uint formatted_offset)
         int mid;
  
         /* Calculate position base using binary search of table; if log2 can be
-        * done in hardware, approximation might work; 
+        * done in hardware, approximation might work;
          * trunc(log2(formatted_offset*formatted_offset)) gets either the proper
          * position slot or the next one, except for slots 0, 1, and 39-49
          *
          * Slots 0-1 are handled by the R0-R1 procedures
          *
-        * Slots 36-49 (formatted_offset >= 262144) can be found by 
+        * Slots 36-49 (formatted_offset >= 262144) can be found by
          * (formatted_offset/131072) + 34 == (formatted_offset >> 17) + 34;
          */
         if (formatted_offset >= 262144) {
@@ -132,7 +133,7 @@ static u32 lzx_record_literal(u8 literal, void *__main_freq_tab)
   * alphabets.  The return value is a 32-bit integer that, if the high bit is
   * set, contains the match length, the position slot, and the position footer
   * for the match.  */
-static u32 lzx_record_match(uint match_offset, uint match_len, 
+static u32 lzx_record_match(uint match_offset, uint match_len,
                             void *__freq_tabs, void *__queue)
  {
         struct lzx_freq_tables *freq_tabs = __freq_tabs;
@@ -208,7 +209,7 @@ static u32 lzx_record_match(uint match_offset, uint match_len,
         return match;
  }
  
-/* 
+/*
   * Writes a compressed literal match to the output.
   *
   * @out:         The output bitstream.
@@ -265,7 +266,7 @@ static int lzx_write_match(struct output_bitstream *out, int block_type,
         main_symbol = len_pos_header + LZX_NUM_CHARS;
  
         /* Output main symbol. */
-       ret = bitstream_put_bits(out, codes->main_codewords[main_symbol], 
+       ret = bitstream_put_bits(out, codes->main_codewords[main_symbol],
                                  codes->main_lens[main_symbol]);
         if (ret != 0)
                 return ret;
@@ -288,15 +289,15 @@ static int lzx_write_match(struct output_bitstream *out, int block_type,
          * aligned offset tree.  Otherwise, only the verbatim bits need to be
          * output. */
         if ((block_type == LZX_BLOCKTYPE_ALIGNED) && (num_extra_bits >= 3)) {
-               
+
                 verbatim_bits = position_footer >> 3;
-               ret = bitstream_put_bits(out, verbatim_bits, 
+               ret = bitstream_put_bits(out, verbatim_bits,
                                          num_extra_bits - 3);
                 if (ret != 0)
                         return ret;
  
                 aligned_bits = (position_footer & 7);
-               ret = bitstream_put_bits(out, 
+               ret = bitstream_put_bits(out,
                                          codes->aligned_codewords[aligned_bits],
                                          codes->aligned_lens[aligned_bits]);
                 if (ret != 0)
@@ -311,7 +312,7 @@ static int lzx_write_match(struct output_bitstream *out, int block_type,
         return 0;
  }
  
-/* 
+/*
   * Writes all compressed literals in a block, both matches and literal bytes, to
   * the output bitstream.
   *
@@ -323,9 +324,9 @@ static int lzx_write_match(struct output_bitstream *out, int block_type,
   * @codes:     Pointer to a structure that contains the codewords for the
   *                     main, length, and aligned offset Huffman codes.
   */
-static int lzx_write_compressed_literals(struct output_bitstream *ostream, 
+static int lzx_write_compressed_literals(struct output_bitstream *ostream,
                                          int block_type,
-                                        const u32 match_tab[], 
+                                        const u32 match_tab[],
                                          uint  num_compressed_literals,
                                          const struct lzx_codes *codes)
  {
@@ -340,14 +341,14 @@ static int lzx_write_compressed_literals(struct output_bitstream *ostream,
                  * actual match (1) or a literal uncompressed byte (0) */
                 if (match & 0x80000000) {
                         /* match */
-                       ret = lzx_write_match(ostream, block_type, match, 
+                       ret = lzx_write_match(ostream, block_type, match,
                                               codes);
                         if (ret != 0)
                                 return ret;
                 } else {
                         /* literal byte */
                         wimlib_assert(match < LZX_NUM_CHARS);
-                       ret = bitstream_put_bits(ostream, 
+                       ret = bitstream_put_bits(ostream,
                                                  codes->main_codewords[match],
                                                  codes->main_lens[match]);
                         if (ret != 0)
@@ -357,7 +358,7 @@ static int lzx_write_compressed_literals(struct output_bitstream *ostream,
         return 0;
  }
  
-/* 
+/*
   * Writes a compressed Huffman tree to the output, preceded by the pretree for
   * it.
   *
@@ -373,8 +374,8 @@ static int lzx_write_compressed_literals(struct output_bitstream *ostream,
   * @lens:      The code lengths for the Huffman tree, indexed by symbol.
   * @num_symbols:       The number of symbols in the code.
   */
-static int lzx_write_compressed_tree(struct output_bitstream *out, 
-                               const u8 lens[], 
+static int lzx_write_compressed_tree(struct output_bitstream *out,
+                               const u8 lens[],
                                 uint num_symbols)
  {
         /* Frequencies of the length symbols, including the RLE symbols (NOT the
@@ -469,7 +470,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out,
                                 if (delta < 0)
                                         delta += 17;
                                 pretree_freqs[19]++;
-                               pretree_freqs[delta]++;
+                               pretree_freqs[(unsigned char)delta]++;
                                 output_syms[output_syms_idx++] = 19;
                                 output_syms[output_syms_idx++] = additional_bits;
                                 output_syms[output_syms_idx++] = delta;
@@ -485,7 +486,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out,
                         if (delta < 0)
                                 delta += 17;
  
-                       pretree_freqs[delta]++;
+                       pretree_freqs[(unsigned char)delta]++;
                         output_syms[output_syms_idx++] = delta;
                 }
  
@@ -496,14 +497,14 @@ static int lzx_write_compressed_tree(struct output_bitstream *out,
  
         /* Build the pretree from the frequencies of the length symbols. */
  
-       make_canonical_huffman_code(LZX_PRETREE_NUM_SYMBOLS, 
-                                   LZX_MAX_CODEWORD_LEN, 
-                                   pretree_freqs, pretree_lens, 
+       make_canonical_huffman_code(LZX_PRETREE_NUM_SYMBOLS,
+                                   LZX_MAX_CODEWORD_LEN,
+                                   pretree_freqs, pretree_lens,
                                     pretree_codewords);
  
         /* Write the lengths of the pretree codes to the output. */
         for (i = 0; i < LZX_PRETREE_NUM_SYMBOLS; i++)
-               bitstream_put_bits(out, pretree_lens[i], 
+               bitstream_put_bits(out, pretree_lens[i],
                                    LZX_PRETREE_ELEMENT_SIZE);
  
         /* Write the length symbols, encoded with the pretree, to the output. */
@@ -512,7 +513,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out,
         while (i < output_syms_idx) {
                 pretree_sym = output_syms[i++];
  
-               bitstream_put_bits(out, pretree_codewords[pretree_sym], 
+               bitstream_put_bits(out, pretree_codewords[pretree_sym],
                                    pretree_lens[pretree_sym]);
                 switch (pretree_sym) {
                 case 17:
@@ -523,7 +524,7 @@ static int lzx_write_compressed_tree(struct output_bitstream *out,
                         break;
                 case 19:
                         bitstream_put_bits(out, output_syms[i++], 1);
-                       bitstream_put_bits(out, 
+                       bitstream_put_bits(out,
                                            pretree_codewords[output_syms[i]],
                                            pretree_lens[output_syms[i]]);
                         i++;
@@ -540,20 +541,20 @@ static int lzx_write_compressed_tree(struct output_bitstream *out,
  static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs,
                                 struct lzx_codes *codes)
  {
-       make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS, 
+       make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS,
                                         LZX_MAX_CODEWORD_LEN,
-                                       freq_tabs->main_freq_table, 
+                                       freq_tabs->main_freq_table,
                                         codes->main_lens,
                                         codes->main_codewords);
  
-       make_canonical_huffman_code(LZX_LENTREE_NUM_SYMBOLS, 
+       make_canonical_huffman_code(LZX_LENTREE_NUM_SYMBOLS,
                                         LZX_MAX_CODEWORD_LEN,
-                                       freq_tabs->len_freq_table, 
+                                       freq_tabs->len_freq_table,
                                         codes->len_lens,
                                         codes->len_codewords);
  
         make_canonical_huffman_code(LZX_ALIGNEDTREE_NUM_SYMBOLS, 8,
-                                       freq_tabs->aligned_freq_table, 
+                                       freq_tabs->aligned_freq_table,
                                         codes->aligned_lens,
                                         codes->aligned_codewords);
  }
@@ -565,7 +566,7 @@ static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs,
   * no bit to indicate that it actually is used, unlike in the LZX compressed
   * format as used in other file formats such as the cabinet format, where a bit
   * is reserved for that purpose. */
-static void do_call_insn_preprocessing(u8 uncompressed_data[], 
+static void do_call_insn_preprocessing(u8 uncompressed_data[],
                                                 uint uncompressed_data_len)
  {
         int i = 0;
@@ -575,12 +576,12 @@ static void do_call_insn_preprocessing(u8 uncompressed_data[],
  
         /* Not enabled in the last 6 bytes, which means the 5-byte call
          * instruction cannot start in the last *10* bytes. */
-       while (i < uncompressed_data_len - 10) { 
+       while (i < uncompressed_data_len - 10) {
                 if (uncompressed_data[i] != 0xe8) {
                         i++;
                         continue;
                 }
-               rel_offset = to_le32(*(int32_t*)(uncompressed_data + i + 1));
+               rel_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1));
  
                 if (rel_offset >= -i && rel_offset < file_size) {
                         if (rel_offset < file_size - i) {
@@ -590,7 +591,7 @@ static void do_call_insn_preprocessing(u8 uncompressed_data[],
                                 /* "compensating translation" */
                                 abs_offset = rel_offset - file_size;
                         }
-                       *(int32_t*)(uncompressed_data + i + 1) = to_le32(abs_offset);
+                       *(int32_t*)(uncompressed_data + i + 1) = cpu_to_le32(abs_offset);
                 }
                 i += 5;
         }
@@ -598,7 +599,11 @@ static void do_call_insn_preprocessing(u8 uncompressed_data[],
  
  
  static const struct lz_params lzx_lz_params = {
+
+        /* LZX_MIN_MATCH == 2, but 2-character matches are rarely useful; the
+         * minimum match for compression is set to 3 instead. */
         .min_match      = 3,
+
         .max_match      = LZX_MAX_MATCH,
         .good_match     = LZX_MAX_MATCH,
         .nice_match     = LZX_MAX_MATCH,
@@ -607,7 +612,7 @@ static const struct lz_params lzx_lz_params = {
         .too_far        = 4096,
  };
  
-/* 
+/*
   * Performs LZX compression on a block of data.
   *
   * @__uncompressed_data:  Pointer to the data to be compressed.
@@ -622,7 +627,7 @@ static const struct lz_params lzx_lz_params = {
   * @compressed_data and @compressed_len_ret will contain the compressed data and
   * its length.  A return value of nonzero means that compressing the data did
   * not reduce its size, and @compressed_data will not contain the full
- * compressed data. 
+ * compressed data.
   */
  int lzx_compress(const void *__uncompressed_data, uint uncompressed_len,
                  void *compressed_data, uint *compressed_len_ret)
@@ -637,8 +642,9 @@ int lzx_compress(const void *__uncompressed_data, uint uncompressed_len,
         uint compressed_len;
         uint i;
         int ret;
+       int block_type = LZX_BLOCKTYPE_ALIGNED;
  
-       LZX_DEBUG("uncompressed_len = %u\n", uncompressed_len);
+       LZX_DEBUG("uncompressed_len = %u", uncompressed_len);
  
         if (uncompressed_len < 100)
                 return 1;
@@ -666,8 +672,7 @@ int lzx_compress(const void *__uncompressed_data, uint uncompressed_len,
                                        &queue, freq_tabs.main_freq_table,
                                        &lzx_lz_params);
  
-       LZX_DEBUG("using %u matches\n", num_matches);
-
+       LZX_DEBUG("using %u matches", num_matches);
  
         lzx_make_huffman_codes(&freq_tabs, &codes);
  
@@ -676,7 +681,7 @@ int lzx_compress(const void *__uncompressed_data, uint uncompressed_len,
  
         /* The first three bits tell us what kind of block it is, and are one
          * of the LZX_BLOCKTYPE_* values.  */
-       bitstream_put_bits(&ostream, LZX_BLOCKTYPE_ALIGNED, 3);
+       bitstream_put_bits(&ostream, block_type, 3);
  
         /* The next bit indicates whether the block size is the default (32768),
          * indicated by a 1 bit, or whether the block size is given by the next
@@ -691,33 +696,34 @@ int lzx_compress(const void *__uncompressed_data, uint uncompressed_len,
         /* Write out the aligned offset tree. Note that M$ lies and says that
          * the aligned offset tree comes after the length tree, but that is
          * wrong; it actually is before the main tree.  */
-       for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
-               bitstream_put_bits(&ostream, codes.aligned_lens[i], 
-                                  LZX_ALIGNEDTREE_ELEMENT_SIZE);
+       if (block_type == LZX_BLOCKTYPE_ALIGNED)
+               for (i = 0; i < LZX_ALIGNEDTREE_NUM_SYMBOLS; i++)
+                       bitstream_put_bits(&ostream, codes.aligned_lens[i],
+                                          LZX_ALIGNEDTREE_ELEMENT_SIZE);
  
         /* Write the pre-tree and lengths for the first LZX_NUM_CHARS symbols in the
          * main tree. */
-       ret = lzx_write_compressed_tree(&ostream, codes.main_lens, 
+       ret = lzx_write_compressed_tree(&ostream, codes.main_lens,
                                         LZX_NUM_CHARS);
         if (ret != 0)
                 return ret;
  
         /* Write the pre-tree and symbols for the rest of the main tree. */
-       ret = lzx_write_compressed_tree(&ostream, codes.main_lens + 
-                                       LZX_NUM_CHARS, 
-                                       LZX_MAINTREE_NUM_SYMBOLS - 
+       ret = lzx_write_compressed_tree(&ostream, codes.main_lens +
+                                       LZX_NUM_CHARS,
+                                       LZX_MAINTREE_NUM_SYMBOLS -
                                                 LZX_NUM_CHARS);
         if (ret != 0)
                 return ret;
  
         /* Write the pre-tree and symbols for the length tree. */
-       ret = lzx_write_compressed_tree(&ostream, codes.len_lens, 
+       ret = lzx_write_compressed_tree(&ostream, codes.len_lens,
                                         LZX_LENTREE_NUM_SYMBOLS);
         if (ret != 0)
                 return ret;
  
         /* Write the compressed literals. */
-       ret = lzx_write_compressed_literals(&ostream, LZX_BLOCKTYPE_ALIGNED,
+       ret = lzx_write_compressed_literals(&ostream, block_type,
                                             match_tab, num_matches, &codes);
         if (ret != 0)
                 return ret;
@@ -728,31 +734,31 @@ int lzx_compress(const void *__uncompressed_data, uint uncompressed_len,
  
         compressed_len = ostream.bit_output - (u8*)compressed_data;
  
-       LZX_DEBUG("Compressed %u => %u bytes\n",
-                       uncompressed_len, compressed_len);
+       LZX_DEBUG("Compressed %u => %u bytes",
+                 uncompressed_len, compressed_len);
  
         *compressed_len_ret = compressed_len;
  
  #ifdef ENABLE_VERIFY_COMPRESSION
         /* Verify that we really get the same thing back when decompressing. */
+       LZX_DEBUG("Verifying the compressed data.");
         u8 buf[uncompressed_len];
-       ret = lzx_decompress(compressed_data, compressed_len, buf, 
+       ret = lzx_decompress(compressed_data, compressed_len, buf,
                              uncompressed_len);
         if (ret != 0) {
-               ERROR("ERROR: Failed to decompress data we compressed!\n");
-               exit(0);
+               ERROR("lzx_compress(): Failed to decompress data we compressed");
                 abort();
         }
  
         for (i = 0; i < uncompressed_len; i++) {
                 if (buf[i] != *((u8*)__uncompressed_data + i)) {
-                       ERROR("Data we compressed didn't decompress to "
-                               "the original data (difference at byte %u of "
-                               "%u)\n", i + 1, uncompressed_len);
+                       ERROR("lzx_compress(): Data we compressed didn't "
+                             "decompress to the original data (difference at "
+                             "byte %u of %u)", i + 1, uncompressed_len);
                         abort();
                 }
         }
-       LZX_DEBUG("Compression verified to be correct.\n");
+       LZX_DEBUG("Compression verified to be correct.");
  #endif
  
         return 0;