Various cleanups

[wimlib] / src / xpress-decomp.c
diff --git a/src/xpress-decomp.c b/src/xpress-decomp.c

index 89e006d3b22bbae21e69ab4c52d5579f80319891..ce240a09806a43cab6ea23aff13a3934ddf0e13a 100644 (file)
--- a/src/xpress-decomp.c
+++ b/src/xpress-decomp.c
@@ -27,21 +27,19 @@
  
  
  /*
- * The XPRESS compression format is a LZ77-based algorithm.  That means it is
- * quite similar to LZX compression, but XPRESS is slightly simpler, so it is a
- * little faster to compress and decompress.
+ * The XPRESS compression format is a LZ77 and Huffman-code based algorithm.
+ * That means it is quite similar to LZX compression, but XPRESS is slightly
+ * simpler, so it is a little faster to compress and decompress.
   *
   * The XPRESS compression format is mostly documented in a file called "[MS-XCA]
   * Xpress Compression Algorithm".  In the MSDN library, it can currently be
   * found under Open Specifications => Protocols => Windows Protocols => Windows
- * Server Protocols => [MS-XCA] Xpress Compression Algorithm".  Note that
- * Microsoft apparently also has either a slightly different format or an
- * entirely different format that is also called XPRESS.  The other one is
- * supposedly used in Windows' hibernation file or something, but the one used
- * in WIM files is the one described in the above document.
+ * Server Protocols => [MS-XCA] Xpress Compression Algorithm".  The format in
+ * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm"
+ * (there apparently are some other versions of XPRESS as well).
   *
   * If you are already familiar with the LZ77 algorithm and Huffman coding, the
- * XPRESS format is pretty simple.  The compressed data begins with 256 bytes
+ * XPRESS format is fairly simple.  The compressed data begins with 256 bytes
   * that contain 512 4-bit integers that are the lengths of the symbols in the
   * Huffman tree used for decoding compressed literals.  This is the only Huffman
   * tree that is used for the entirety of the compressed data, and the codeword
@@ -58,7 +56,7 @@
   * The trickiest part is probably the fact that literal bytes for match lengths
   * are encoded "separately" from the bitstream.
   *
- * Also, a caveat--- according to M$'s documentation for XPRESS,
+ * Also, a caveat--- according to Microsoft's documentation for XPRESS,
   *
   *     "Some implementation of the decompression algorithm expect an extra
   *     symbol to mark the end of the data.  Specifically, some implementations
@@ -81,20 +79,30 @@
  #include "decomp.h"
  
  
-/* Decodes @huffsym, a value >= XPRESS_NUM_CHARS, that is the header of a match.
- * */
-static int xpress_decode_match(int huffsym, uint window_pos, uint window_len, 
-                               u8 window[], struct input_bitstream *istream)
+/*
+ * Decodes a symbol @huffsym that begins an XPRESS match.
+ *
+ * The low 8 bits of the symbol are divided into:
+ *
+ * bits 0-3:  length header
+ * bits 4-7:  index of high-order bit of match offset
+ *
+ * Note: taking the low 8 bits of the symbol is the same as subtracting 256, the
+ * number of symbols reserved for literals.
+ */
+static int xpress_decode_match(int huffsym, unsigned window_pos,
+                              unsigned window_len, u8 window[],
+                              struct input_bitstream *istream)
  {
-       uint match_len;
-       uint match_offset;
+       unsigned match_len;
+       unsigned match_offset;
         u8 match_sym = (u8)huffsym;
         u8 len_hdr = match_sym & 0xf;
         u8 offset_bsr = match_sym >> 4;
         int ret;
         u8 *match_dest;
         u8 *match_src;
-       uint i;
+       unsigned i;
  
         ret = bitstream_read_bits(istream, offset_bsr, &match_offset);
         if (ret != 0)
@@ -107,7 +115,6 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len,
                         return -1;
                 match_len = ret;
                 if (match_len == 0xff) {
-
                         ret = bitstream_read_byte(istream);
                         if (ret == -1)
                                 return -1;
@@ -128,7 +135,6 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len,
         }
         match_len += XPRESS_MIN_MATCH;
  
-
         /* Verify that the match is in the bounds of the part of the window
          * currently in use, then copy the source of the match to the current
          * position. */
@@ -137,7 +143,7 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len,
         match_src = match_dest - match_offset;
  
         if (window_pos + match_len > window_len) {
-               ERROR("XPRESS dedecompression error: match of length %d "
+               ERROR("XPRESS decompression error: match of length %d "
                       "bytes overflows window", match_len);
                 return -1;
         }
@@ -157,48 +163,52 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len,
  
  /* Decodes the Huffman-encoded matches and literal bytes in a block of
   * XPRESS-encoded data. */
-static int xpress_decompress_literals(struct input_bitstream *istream, 
-                                     u8 uncompressed_data[], 
-                                     uint uncompressed_len, 
-                                     const u8 lens[], 
+static int xpress_decompress_literals(struct input_bitstream *istream,
+                                     u8 uncompressed_data[],
+                                     unsigned uncompressed_len,
+                                     const u8 lens[],
                                       const u16 decode_table[])
  {
-       uint curpos = 0;
-       uint huffsym;
+       unsigned curpos = 0;
+       unsigned huffsym;
         int match_len;
-       int ret;
+       int ret = 0;
  
         while (curpos < uncompressed_len) {
-               ret = read_huffsym(istream, decode_table, lens, 
-                               XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS, &huffsym,
-                               XPRESS_MAX_CODEWORD_LEN);
+               ret = read_huffsym(istream, decode_table, lens,
+                                  XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS,
+                                  &huffsym, XPRESS_MAX_CODEWORD_LEN);
                 if (ret != 0)
-                       return ret;
+                       break;
  
                 if (huffsym < XPRESS_NUM_CHARS) {
                         uncompressed_data[curpos++] = huffsym;
                 } else {
-                       match_len = xpress_decode_match(huffsym, curpos, 
-                                               uncompressed_len, 
-                                               uncompressed_data, istream);
-                       if (match_len == -1)
-                               return 1;
+                       match_len = xpress_decode_match(huffsym,
+                                                       curpos,
+                                                       uncompressed_len,
+                                                       uncompressed_data,
+                                                       istream);
+                       if (match_len == -1) {
+                               ret = 1;
+                               break;
+                       }
                         curpos += match_len;
                 }
         }
-       return 0;
+       return ret;
  }
  
  
-int xpress_decompress(const void *__compressed_data, uint compressed_len, 
-                       void *uncompressed_data, uint uncompressed_len)
+int xpress_decompress(const void *__compressed_data, unsigned compressed_len,
+                     void *uncompressed_data, unsigned uncompressed_len)
  {
         u8 lens[XPRESS_NUM_SYMBOLS];
         u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS];
         struct input_bitstream istream;
         u8 *lens_p;
         const u8 *compressed_data;
-       uint i;
+       unsigned i;
         int ret;
  
         compressed_data = __compressed_data;
@@ -225,9 +235,10 @@ int xpress_decompress(const void *__compressed_data, uint compressed_len,
         if (ret != 0)
                 return ret;
  
-       init_input_bitstream(&istream, compressed_data + XPRESS_NUM_SYMBOLS / 2, 
+       init_input_bitstream(&istream, compressed_data + XPRESS_NUM_SYMBOLS / 2,
                              compressed_len - XPRESS_NUM_SYMBOLS / 2);
  
-       return xpress_decompress_literals(&istream, uncompressed_data, 
-                                       uncompressed_len, lens, decode_table);
+       return xpress_decompress_literals(&istream, uncompressed_data,
+                                         uncompressed_len, lens,
+                                         decode_table);
  }