X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flzx-compress.c;h=b23c2df088642ce2d9001ebeba130d8fbba504d8;hb=5218b1d7c83cf9e98ed6276e099844ae0d80abc2;hp=effe656bf20fcd14f65c780e13b7fc78c1d31483;hpb=d66b5c805c4e9a660bac6f979d88c1820cb031f2;p=wimlib

diff --git a/src/lzx-compress.c b/src/lzx-compress.c
index effe656b..b23c2df0 100644
--- a/src/lzx-compress.c
+++ b/src/lzx-compress.c
@@ -7,7 +7,7 @@
 
 /*
  * Copyright (C) 2002 Matthew T. Russotto
- * Copyright (C) 2012 Eric Biggers
+ * Copyright (C) 2012, 2013 Eric Biggers
  *
  * This file is part of wimlib, a library for working with WIM files.
  *
@@ -27,8 +27,8 @@
 
 
 /*
- * This file provides lzx_compress(), a function to compress an in-memory buffer
- * of data using LZX compression, as used in the WIM file format.
+ * This file provides wimlib_lzx_compress(), a function to compress an in-memory
+ * buffer of data using LZX compression, as used in the WIM file format.
  *
  * Please see the comments in lzx-decompress.c for more information about this
  * compression format.
@@ -57,6 +57,7 @@
  * blocks from one input chunk is not yet implemented.
  */
 
+#include "wimlib.h"
 #include "lzx.h"
 #include "compress.h"
 #include <stdlib.h>
@@ -91,7 +92,8 @@ struct lzx_freq_tables {
  * numbers in the lzx_position_base array to calculate the slot directly from
  * the formatted offset without actually looking at the array.
  */
-static inline unsigned lzx_get_position_slot(unsigned formatted_offset)
+static inline unsigned
+lzx_get_position_slot(unsigned formatted_offset)
 {
 #if 0
 	/*
@@ -120,7 +122,8 @@ static inline unsigned lzx_get_position_slot(unsigned formatted_offset)
 	}
 }
 
-static u32 lzx_record_literal(u8 literal, void *__main_freq_tab)
+static u32
+lzx_record_literal(u8 literal, void *__main_freq_tab)
 {
 	freq_t *main_freq_tab = __main_freq_tab;
 	main_freq_tab[literal]++;
@@ -131,12 +134,12 @@ static u32 lzx_record_literal(u8 literal, void *__main_freq_tab)
  * the frequency of symbols in the main, length, and aligned offset alphabets.
  * The return value is a 32-bit number that provides the match in an
  * intermediate representation documented below. */
-static u32 lzx_record_match(unsigned match_offset, unsigned match_len,
-			    void *__freq_tabs, void *__queue)
+static u32
+lzx_record_match(unsigned match_offset, unsigned match_len,
+		 void *__freq_tabs, void *__queue)
 {
 	struct lzx_freq_tables *freq_tabs = __freq_tabs;
 	struct lru_queue *queue = __queue;
-	unsigned formatted_offset;
 	unsigned position_slot;
 	unsigned position_footer = 0;
 	u32 match;
@@ -150,23 +153,20 @@ static u32 lzx_record_match(unsigned match_offset, unsigned match_len,
 
 	/* If possible, encode this offset as a repeated offset. */
 	if (match_offset == queue->R0) {
-		formatted_offset = 0;
-		position_slot    = 0;
+		position_slot = 0;
 	} else if (match_offset == queue->R1) {
 		swap(queue->R0, queue->R1);
-		formatted_offset = 1;
-		position_slot    = 1;
+		position_slot = 1;
 	} else if (match_offset == queue->R2) {
 		swap(queue->R0, queue->R2);
-		formatted_offset = 2;
-		position_slot    = 2;
+		position_slot = 2;
 	} else {
 		/* Not a repeated offset. */
 
 		/* offsets of 0, 1, and 2 are reserved for the repeated offset
 		 * codes, so non-repeated offsets must be encoded as 3+.  The
 		 * minimum offset is 1, so encode the offsets offset by 2. */
-		formatted_offset = match_offset + LZX_MIN_MATCH;
+		unsigned formatted_offset = match_offset + LZX_MIN_MATCH;
 
 		queue->R2 = queue->R1;
 		queue->R1 = queue->R0;
@@ -245,8 +245,9 @@ static u32 lzx_record_match(unsigned match_offset, unsigned match_len,
  * @codes:	Pointer to a structure that contains the codewords for the
  * 			main, length, and aligned offset Huffman codes.
  */
-static int lzx_write_match(struct output_bitstream *out, int block_type,
-			   u32 match, const struct lzx_codes *codes)
+static int
+lzx_write_match(struct output_bitstream *out, int block_type,
+		u32 match, const struct lzx_codes *codes)
 {
 	/* low 8 bits are the match length minus 2 */
 	unsigned match_len_minus_2 = match & 0xff;
@@ -351,11 +352,12 @@ static int lzx_write_match(struct output_bitstream *out, int block_type,
  * @codes:	Pointer to a structure that contains the codewords for the
  * 			main, length, and aligned offset Huffman codes.
  */
-static int lzx_write_compressed_literals(struct output_bitstream *ostream,
-					 int block_type,
-			 		 const u32 match_tab[],
-					 unsigned  num_compressed_literals,
-					 const struct lzx_codes *codes)
+static int
+lzx_write_compressed_literals(struct output_bitstream *ostream,
+			      int block_type,
+			      const u32 match_tab[],
+			      unsigned  num_compressed_literals,
+			      const struct lzx_codes *codes)
 {
 	unsigned i;
 	u32 match;
@@ -401,8 +403,9 @@ static int lzx_write_compressed_literals(struct output_bitstream *ostream,
  * @lens:	The code lengths for the Huffman tree, indexed by symbol.
  * @num_symbols:	The number of symbols in the code.
  */
-static int lzx_write_compressed_tree(struct output_bitstream *out,
-				     const u8 lens[], unsigned num_symbols)
+static int
+lzx_write_compressed_tree(struct output_bitstream *out,
+			  const u8 lens[], unsigned num_symbols)
 {
 	/* Frequencies of the length symbols, including the RLE symbols (NOT the
 	 * actual lengths themselves). */
@@ -564,8 +567,9 @@ static int lzx_write_compressed_tree(struct output_bitstream *out,
 
 /* Builds the canonical Huffman code for the main tree, the length tree, and the
  * aligned offset tree. */
-static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs,
-				struct lzx_codes *codes)
+static void
+lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs,
+		       struct lzx_codes *codes)
 {
 	make_canonical_huffman_code(LZX_MAINTREE_NUM_SYMBOLS,
 					LZX_MAX_CODEWORD_LEN,
@@ -585,8 +589,9 @@ static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs,
 					codes->aligned_codewords);
 }
 
-static void do_call_insn_translation(u32 *call_insn_target, int input_pos,
-				     int32_t file_size)
+static void
+do_call_insn_translation(u32 *call_insn_target, int input_pos,
+			 int32_t file_size)
 {
 	int32_t abs_offset;
 	int32_t rel_offset;
@@ -606,8 +611,8 @@ static void do_call_insn_translation(u32 *call_insn_target, int input_pos,
 
 /* This is the reverse of undo_call_insn_preprocessing() in lzx-decompress.c.
  * See the comment above that function for more information. */
-static void do_call_insn_preprocessing(u8 uncompressed_data[],
-				       int uncompressed_data_len)
+static void
+do_call_insn_preprocessing(u8 uncompressed_data[], int uncompressed_data_len)
 {
 	for (int i = 0; i < uncompressed_data_len - 10; i++) {
 		if (uncompressed_data[i] == 0xe8) {
@@ -634,25 +639,10 @@ static const struct lz_params lzx_lz_params = {
 	.too_far        = 4096,
 };
 
-/*
- * Performs LZX compression on a block of data.
- *
- * @__uncompressed_data:  Pointer to the data to be compressed.
- * @uncompressed_len:	  Length, in bytes, of the data to be compressed.
- * @compressed_data:	  Pointer to a location at least (@uncompressed_len - 1)
- * 				bytes long into which the compressed data may be
- * 				written.
- * @compressed_len_ret:	  A pointer to an unsigned int into which the length of
- * 				the compressed data may be returned.
- *
- * Returns zero if compression was successfully performed.  In that case
- * @compressed_data and @compressed_len_ret will contain the compressed data and
- * its length.  A return value of nonzero means that compressing the data did
- * not reduce its size, and @compressed_data will not contain the full
- * compressed data.
- */
-int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len,
-		 void *compressed_data, unsigned *compressed_len_ret)
+/* Documented in wimlib.h */
+WIMLIBAPI unsigned
+wimlib_lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len,
+		    void *compressed_data)
 {
 	struct output_bitstream ostream;
 	u8 uncompressed_data[uncompressed_len + 8];
@@ -666,8 +656,10 @@ int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len,
 	int ret;
 	int block_type = LZX_BLOCKTYPE_ALIGNED;
 
+	wimlib_assert(uncompressed_len <= 32768);
+
 	if (uncompressed_len < 100)
-		return 1;
+		return 0;
 
 	memset(&freq_tabs, 0, sizeof(freq_tabs));
 	queue.R0 = 1;
@@ -677,6 +669,7 @@ int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len,
 	/* The input data must be preprocessed. To avoid changing the original
 	 * input, copy it to a temporary buffer. */
 	memcpy(uncompressed_data, __uncompressed_data, uncompressed_len);
+	memset(uncompressed_data + uncompressed_len, 0, 8);
 
 	/* Before doing any actual compression, do the call instruction (0xe8
 	 * byte) translation on the uncompressed data. */
@@ -723,55 +716,55 @@ int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len,
 	 * main tree. */
 	ret = lzx_write_compressed_tree(&ostream, codes.main_lens,
 				        LZX_NUM_CHARS);
-	if (ret != 0)
-		return ret;
+	if (ret)
+		return 0;
 
 	/* Write the pre-tree and symbols for the rest of the main tree. */
 	ret = lzx_write_compressed_tree(&ostream, codes.main_lens +
 					LZX_NUM_CHARS,
 					LZX_MAINTREE_NUM_SYMBOLS -
 						LZX_NUM_CHARS);
-	if (ret != 0)
-		return ret;
+	if (ret)
+		return 0;
 
 	/* Write the pre-tree and symbols for the length tree. */
 	ret = lzx_write_compressed_tree(&ostream, codes.len_lens,
 					LZX_LENTREE_NUM_SYMBOLS);
-	if (ret != 0)
-		return ret;
+	if (ret)
+		return 0;
 
 	/* Write the compressed literals. */
 	ret = lzx_write_compressed_literals(&ostream, block_type,
 					    match_tab, num_matches, &codes);
-	if (ret != 0)
-		return ret;
+	if (ret)
+		return 0;
 
 	ret = flush_output_bitstream(&ostream);
-	if (ret != 0)
-		return ret;
+	if (ret)
+		return 0;
 
 	compressed_len = ostream.bit_output - (u8*)compressed_data;
 
-	*compressed_len_ret = compressed_len;
-
 #ifdef ENABLE_VERIFY_COMPRESSION
 	/* Verify that we really get the same thing back when decompressing. */
-	u8 buf[uncompressed_len];
-	ret = lzx_decompress(compressed_data, compressed_len, buf,
-			     uncompressed_len);
-	if (ret != 0) {
-		ERROR("lzx_compress(): Failed to decompress data we compressed");
-		abort();
-	}
-
-	for (i = 0; i < uncompressed_len; i++) {
-		if (buf[i] != *((u8*)__uncompressed_data + i)) {
-			ERROR("lzx_compress(): Data we compressed didn't "
-			      "decompress to the original data (difference at "
-			      "byte %u of %u)", i + 1, uncompressed_len);
+	{
+		u8 buf[uncompressed_len];
+		ret = wimlib_lzx_decompress(compressed_data, compressed_len,
+					    buf, uncompressed_len);
+		if (ret != 0) {
+			ERROR("lzx_compress(): Failed to decompress data we compressed");
 			abort();
 		}
+
+		for (i = 0; i < uncompressed_len; i++) {
+			if (buf[i] != *((u8*)__uncompressed_data + i)) {
+				ERROR("lzx_compress(): Data we compressed didn't "
+				      "decompress to the original data (difference at "
+				      "byte %u of %u)", i + 1, uncompressed_len);
+				abort();
+			}
+		}
 	}
 #endif
-	return 0;
+	return compressed_len;
 }