Increment real_refcnt for metadata lte's

[wimlib] / src / lzx-decomp.c
diff --git a/src/lzx-decomp.c b/src/lzx-decomp.c

index 92bfb2e9413413a32a6e297878ec4369a8b23d81..73e68127c0d6fa95bf8c9e62e9e49656dc704ea5 100644 (file)
--- a/src/lzx-decomp.c
+++ b/src/lzx-decomp.c
@@ -1,9 +1,9 @@
  /*
   * lzx-decomp.c
   *
- * Routines for LZX decompression.  The LZX format has many similarities to the
- * DEFLATE format used in zlib and gzip, but it's not quite the same.
- *
+ * LZX decompression routines, originally based on code taken from cabextract
+ * v0.5, which was, itself, a modified version of the lzx decompression code
+ * from unlzx.
   */
  
  /*
@@ -26,38 +26,39 @@
   */
  
  /*
- * This file has been modified from code taken from cabextract v0.5, which was,
- * itself, a modified version of the lzx decompression code from unlzx.  The
- * code has been customized for wimlib.
+ * LZX is a LZ77 and Huffman-code based compression format that has many
+ * similarities to the DEFLATE format used in zlib.  The compression ratio is as
+ * good or better than DEFLATE.  However, in WIM files only up to 32768 bytes of
+ * data can ever compressed be in the same LZX block, so a .tar.gz file could
+ * potentially be smaller than a WIM file that uses LZX compression because it
+ * can use a larger LZ77 window size.
   *
   * Some notes on the LZX compression format as used in Windows Imaging (WIM)
   * files:
   *
- * A compressed WIM file resource consists of a table of chunk offsets followed
- * by compressed chunks.  All compressed chunks except the last decompress to
- * WIM_CHUNK_SIZE (= 32768) bytes.  This is quite similar to the cabinet (.cab)
- * file format, but they are not the same (at least based on M$'s
- * documentation).  According to the documentation, in the cabinet format, the
- * LZX block size is independent from the CFDATA blocks and may span several
- * CFDATA blocks.  However, for WIM file resources, I have seen no case of a LZX
- * block spanning multiple WIM chunks.  This is probably done to make it easier
- * to randomly access the compressed file resources.  WIMLIB in fact makes use
- * of this feature to allow semi-random access to file resources in the
- * read_resource() function.
+ * A compressed WIM resource consists of a table of chunk offsets followed by
+ * the compressed chunks themselves.  All compressed chunks except possibly the
+ * last decompress to WIM_CHUNK_SIZE (= 32768) bytes.  This is quite similar to
+ * the cabinet (.cab) file format, but they are not the same.  According to the
+ * cabinet format documentation, the LZX block size is independent from the
+ * CFDATA blocks, and a LZX block may span several CFDATA blocks.  However, in
+ * WIMs, LZX blocks do not appear to ever span multiple WIM chunks.  Note that
+ * this means any WIM chunk may be decompressed or compressed independently from
+ * any other chunk, which is convenient.
   *
- * Usually a WIM chunk will contain only one LZX block, but on rare occasions it
- * may contain multiple LZX block. The LZX block are usually the aligned block
- * type or verbatim block type, but can (very rarely) be the uncompressed block
- * type.  The size of a LZX block is specified by 1 or 17 bits following the 3
- * bits that specify the block type.  A '1' means to use the default block size
- * (equal to 32768), while a '0' means that the block size is given by the next
- * 16 bits.
+ * A LZX compressed WIM chunk contains one or more LZX blocks of the aligned,
+ * verbatim, or uncompressed block types.  For aligned and verbatim blocks, the
+ * size of the block in uncompressed bytes is specified by a bit following the 3
+ * bits that specify the block type, possibly followed by an additional 16 bits.
+ * '1' means to use the default block size (equal to 32768, the size of a WIM
+ * chunk--- and this seems to only be valid for the first LZX block in a WIM
+ * chunk), while '0' means that the block size is provided by the next 16 bits.
   *
- * The cabinet format, as documented, allows for the possibility that a CFDATA
- * chunk is up to 6144 bytes larger than the uncompressed data.  In the WIM
- * format, however, it appears that every chunk that would be 32768 bytes or
- * more when compressed, is actually stored uncompressed.  This is not
- * documented by M$.
+ * The cabinet format, as documented, allows for the possibility that a
+ * compressed CFDATA chunk is up to 6144 bytes larger than the data it
+ * uncompresses to.  However, in the WIM format it appears that every chunk that
+ * would be 32768 bytes or more when compressed is actually stored fully
+ * uncompressed.
   *
   * The 'e8' preprocessing step that changes x86 call instructions to use
   * absolute offsets instead of relative offsets relies on a filesize parameter.
@@ -65,11 +66,10 @@
   * the file resource could be used for this purpose), and instead a magic file
   * size of 12000000 is used.  The 'e8' preprocessing is always done, and there
   * is no bit to indicate whether it is done or not.
- *
   */
  
  /*
- * Some more notes about errors in Microsoft's documentation:
+ * Some more notes about errors in Microsoft's LZX documentation:
   *
   * Microsoft's LZX document and their implementation of the com.ms.util.cab Java
   * package do not concur.
@@ -108,9 +108,7 @@
  
  #include "util.h"
  #include "lzx.h"
-
  #include "decomp.h"
-
  #include <string.h>
  
  /* Huffman decoding tables and maps from symbols to code lengths. */
@@ -137,7 +135,7 @@ struct lzx_tables {
   */
  static inline int read_huffsym_using_pretree(struct input_bitstream *istream,
                                              const u16 pretree_decode_table[],
-                                            const u8 pretree_lens[], uint *n)
+                                            const u8 pretree_lens[], unsigned *n)
  {
         return read_huffsym(istream, pretree_decode_table, pretree_lens,
                             LZX_PRETREE_NUM_SYMBOLS, LZX_PRETREE_TABLEBITS, n,
@@ -147,7 +145,7 @@ static inline int read_huffsym_using_pretree(struct input_bitstream *istream,
  /* Reads a Huffman-encoded symbol using the main tree. */
  static inline int read_huffsym_using_maintree(struct input_bitstream *istream,
                                               const struct lzx_tables *tables,
-                                             uint *n)
+                                             unsigned *n)
  {
         return read_huffsym(istream, tables->maintree_decode_table,
                             tables->maintree_lens, LZX_MAINTREE_NUM_SYMBOLS,
@@ -157,7 +155,7 @@ static inline int read_huffsym_using_maintree(struct input_bitstream *istream,
  /* Reads a Huffman-encoded symbol using the length tree. */
  static inline int read_huffsym_using_lentree(struct input_bitstream *istream,
                                              const struct lzx_tables *tables,
-                                            uint *n)
+                                            unsigned *n)
  {
         return read_huffsym(istream, tables->lentree_decode_table,
                             tables->lentree_lens, LZX_LENTREE_NUM_SYMBOLS,
@@ -167,7 +165,7 @@ static inline int read_huffsym_using_lentree(struct input_bitstream *istream,
  /* Reads a Huffman-encoded symbol using the aligned offset tree. */
  static inline int read_huffsym_using_alignedtree(struct input_bitstream *istream,
                                                  const struct lzx_tables *tables,
-                                                uint *n)
+                                                unsigned *n)
  {
         return read_huffsym(istream, tables->alignedtree_decode_table,
                             tables->alignedtree_lens,
@@ -188,14 +186,14 @@ static inline int read_huffsym_using_alignedtree(struct input_bitstream *istream
   *
   */
  static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
-                             uint num_lens)
+                             unsigned num_lens)
  {
         /* Declare the decoding table and length table for the pretree. */
         u16 pretree_decode_table[(1 << LZX_PRETREE_TABLEBITS) +
                                         (LZX_PRETREE_NUM_SYMBOLS * 2)];
         u8 pretree_lens[LZX_PRETREE_NUM_SYMBOLS];
-       uint i;
-       uint len;
+       unsigned i;
+       unsigned len;
         int ret;
  
         /* Read the code lengths of the pretree codes.  There are 20 lengths of
@@ -228,10 +226,10 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
                  * some number of the next lengths are all 0, or some number of
                  * the next lengths are all equal to the next symbol in the
                  * input. */
-               uint tree_code;
-               uint num_zeroes;
-               uint code;
-               uint num_same;
+               unsigned tree_code;
+               unsigned num_zeroes;
+               unsigned code;
+               unsigned num_same;
                 char value;
  
                 ret = read_huffsym_using_pretree(istream, pretree_decode_table,
@@ -309,16 +307,17 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
   *                     blocks, which contain this information in the header)
   */
  static int lzx_read_block_header(struct input_bitstream *istream,
-                                int *block_size_ret, int *block_type_ret,
+                                unsigned *block_size_ret,
+                                unsigned *block_type_ret,
                                  struct lzx_tables *tables,
                                  struct lru_queue *queue)
  {
         int ret;
         int block_type;
-       uint block_size;
+       unsigned block_size;
         int s;
         int i;
-       uint len;
+       unsigned len;
         int32_t R[3];
  
         ret = bitstream_ensure_bits(istream, 4);
@@ -494,14 +493,14 @@ static int lzx_decode_match(int main_element, int block_type,
                             struct lru_queue *queue,
                             struct input_bitstream *istream)
  {
-       uint length_header;
-       uint position_slot;
-       uint match_len;
-       uint match_offset;
-       uint additional_len;
-       uint num_extra_bits;
-       uint verbatim_bits;
-       uint aligned_bits;
+       unsigned length_header;
+       unsigned position_slot;
+       unsigned match_len;
+       unsigned match_offset;
+       unsigned additional_len;
+       unsigned num_extra_bits;
+       unsigned verbatim_bits;
+       unsigned aligned_bits;
         int ret;
         int i;
         u8 *match_dest;
@@ -654,7 +653,7 @@ static int lzx_decode_match(int main_element, int block_type,
   * format as used in other file formats, where a bit is reserved for that
   * purpose. */
  static void undo_call_insn_preprocessing(u8 uncompressed_data[],
-                                        uint uncompressed_data_len)
+                                        unsigned uncompressed_data_len)
  {
         int i = 0;
         int file_size = LZX_MAGIC_FILESIZE;
@@ -706,8 +705,8 @@ static int lzx_decompress_block(int block_type, int block_size, u8 *window,
                                 struct lru_queue *queue,
                                 struct input_bitstream *istream)
  {
-       uint bytes_remaining;
-       uint main_element;
+       unsigned bytes_remaining;
+       unsigned main_element;
         int match_len;
         int ret;
  
@@ -751,16 +750,16 @@ static int lzx_decompress_block(int block_type, int block_size, u8 *window,
   *
   * Return non-zero on failure.
   */
-int lzx_decompress(const void *compressed_data, uint compressed_len,
-                  void *uncompressed_data, uint uncompressed_len)
+int lzx_decompress(const void *compressed_data, unsigned compressed_len,
+                  void *uncompressed_data, unsigned uncompressed_len)
  {
-       struct lzx_tables       tables;
-       struct input_bitstream  istream;
-       struct lru_queue        queue;
-       uint                    bytes_remaining;
+       struct lzx_tables tables;
+       struct input_bitstream istream;
+       struct lru_queue queue;
+       unsigned bytes_remaining;
+       unsigned block_size;
+       unsigned block_type;
         int ret;
-       int block_size;
-       int block_type;
  
         LZX_DEBUG("lzx_decompress (compressed_data = %p, compressed_len = %d, "
                   "uncompressed_data = %p, uncompressed_len = %d).",
@@ -786,17 +785,17 @@ int lzx_decompress(const void *compressed_data, uint compressed_len,
         while (bytes_remaining != 0) {
  
                 LZX_DEBUG("Reading block header.");
-               ret = lzx_read_block_header(&istream, &block_size, &block_type,
-                                                       &tables, &queue);
+               ret = lzx_read_block_header(&istream, &block_size,
+                                           &block_type, &tables, &queue);
                 if (ret != 0)
                         return ret;
  
-               LZX_DEBUG("block_size = %d, bytes_remaining = %d.",
+               LZX_DEBUG("block_size = %u, bytes_remaining = %u",
                           block_size, bytes_remaining);
  
                 if (block_size > bytes_remaining) {
                         ERROR("lzx_decompress(): Expected a block size of at "
-                             "most %d bytes (found %d bytes)",
+                             "most %u bytes (found %u bytes)",
                               bytes_remaining, block_size);
                         return 1;
                 }
@@ -838,7 +837,6 @@ int lzx_decompress(const void *compressed_data, uint compressed_len,
  
                 if (bytes_remaining != 0)
                         LZX_DEBUG("%d bytes remaining.", bytes_remaining);
-
         }
  
         if (uncompressed_len >= 10)