From ac4e9d3b603a8abcc99965ed99576fd0721f8ccb Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers3@gmail.com>
Date: Mon, 10 Sep 2012 20:01:29 -0500
Subject: [PATCH 1/1] Fix up LZ77 compression code and prepare v1.0.3

---
 NEWS             |   6 +++
 README           |  29 ++++++++++-
 TODO             |   1 +
 configure.ac     |   2 +-
 src/lz.c         | 125 +++++++++++++++++++++++++----------------------
 src/lzx-comp.c   |   4 ++
 src/lzx-decomp.c |  13 +++++
 7 files changed, 119 insertions(+), 61 deletions(-)

diff --git a/NEWS b/NEWS
index d2d336c7..820cd79e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,11 @@
 Only the most important changes more recent than version 0.6 are noted here.
 
+Version 1.0.3:
+	LZX and XPRESS compression improvements.
+
+	Fixed calculation of Directory Count, File Count, Total Bytes, and Hard
+	Link Bytes of the WIM.
+
 Version 1.0.2:
 	Fixed bug when capturing NTFS file with multiple named data streams.
 
diff --git a/README b/README
index 8e6d401a..0f15a36f 100644
--- a/README
+++ b/README
@@ -1,6 +1,6 @@
 				   WIMLIB                                    
 
-This is wimlib version 1.0.2 (September 2012).  wimlib can be used to read,
+This is wimlib version 1.0.3 (September 2012).  wimlib can be used to read,
 write, and mount files in the Windows Imaging Format (WIM files).  These
 files are normally created by using the `imagex.exe' utility on Windows,
 but this library provides a free implementation of imagex for UNIX-based
@@ -75,6 +75,33 @@ details.
 image of Windows PE that can be put on a CD or USB drive, or published on a
 server for PXE booting.  See the main page `doc/mkwinpeiso.1' for more details.
 
+			       COMPRESSION RATIO
+
+wimlib can create XPRESS or LZX compressed WIM archives.  As of wimlib v1.0.3,
+the XPRESS compression ratio is slightly better than that provided by
+Microsoft's software, while the LZX compression ratio is approaching that of
+Microsoft's software but is not quite there yet.  Running time is as good as or
+better than Microsoft's software.
+
+The following tables compare the compression ratio and performance for creating
+a compressed Windows PE image (disk usage of about 524 MB, uncompressed WIM size
+361 MB):
+
+	Table 1. WIM size
+
+					XPRESS Compression	LZX Compression
+	wimlib imagex (v1.0.2):		145,283,871 bytes	139,288,293 bytes
+	wimlib imagex (v1.0.3):		139,288,293 bytes	131,379,869 bytes
+	Microsoft imagex.exe:		140,406,981 bytes	127,249,176 bytes
+
+	Table 2. Time to create WIM
+
+					XPRESS Compression	LZX Compression
+	wimlib imagex (v1.0.2):		18 sec			49 sec
+	wimlib imagex (v1.0.3):		19 sec			30 sec
+	Microsoft imagex.exe:		25 sec			89 sec
+
+
 				  DEPENDENCIES
 
 * libxml2
diff --git a/TODO b/TODO
index 16538e77..36ab8e62 100644
--- a/TODO
+++ b/TODO
@@ -1,4 +1,5 @@
 - Fix bugs
 - Add more test cases
 - Implement LZX block splitting to improve compression ratio
+- Implement multi-threaded compression
 - Improve performance of large directories in mounted WIMs.
diff --git a/configure.ac b/configure.ac
index d761ec81..6ccb0155 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([wimlib], [1.0.2], [ebiggers3@gmail.com])
+AC_INIT([wimlib], [1.0.3], [ebiggers3@gmail.com])
 AC_CONFIG_SRCDIR([src/wim.c])
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
diff --git a/src/lz.c b/src/lz.c
index 93d07f0d..ddf2017a 100644
--- a/src/lz.c
+++ b/src/lz.c
@@ -29,10 +29,19 @@
 #include "comp.h"
 #include <string.h>
 
+#define LZ_MIN_MATCH 3
+
 #define HASH_BITS	15
 #define HASH_SIZE	(1 << HASH_BITS)
 #define HASH_MASK	(HASH_SIZE - 1)
-#define HASH_SHIFT	((HASH_BITS + 2) /  3)
+
+#if LZ_MIN_MATCH == 2
+#	define HASH_SHIFT	8
+#elif LZ_MIN_MATCH == 3
+#	define HASH_SHIFT	5
+#else
+#error "Invalid LZ_MIN_MATCH"
+#endif
 
 /* Hash function, based on code from zlib.  This function will update and return
  * the hash value @hash for the string ending on the additional input character
@@ -64,7 +73,7 @@ static inline uint update_hash(uint hash, u8 c)
 static inline uint insert_string(u16 hash_tab[], u16 prev_tab[], 
 				 const u8 window[], uint str_pos, uint hash)
 {
-	hash = update_hash(hash, window[str_pos + 2]);
+	hash = update_hash(hash, window[str_pos + LZ_MIN_MATCH - 1]);
 	prev_tab[str_pos] = hash_tab[hash];
 	hash_tab[hash] = str_pos;
 	return hash;
@@ -108,7 +117,7 @@ static uint longest_match(const u8 window[], uint bytes_remaining,
 
 	uint nice_match = min(params->nice_match, bytes_remaining);
 
-	const u8 *strend = scan + params->max_match;
+	const u8 *strend = scan + min(params->max_match, bytes_remaining);
 
 	u8 scan_end1 = scan[best_len - 1];
 	u8 scan_end = scan[best_len];
@@ -121,42 +130,39 @@ static uint longest_match(const u8 window[], uint bytes_remaining,
 	do {
 		match = &window[cur_match];
 
-
-		/* Skip to next match if the match length cannot increase
-		 * or if the match length is less than 2.  Note that the checks below
-		 * for insufficient lookahead only occur occasionally for performance
-		 * reasons.  Therefore uninitialized memory will be accessed, and
-		 * conditional jumps will be made that depend on those values.
-		 * However the length of the match is limited to the lookahead, so
-		 * the output of deflate is not affected by the uninitialized values.
+		/* Skip to next match if the match length cannot increase or if
+		 * the match length is less than 2.  Note that the checks below
+		 * for insufficient lookahead only occur occasionally for
+		 * performance reasons.  Therefore uninitialized memory will be
+		 * accessed, and conditional jumps will be made that depend on
+		 * those values.  However the length of the match is limited to
+		 * the lookahead, so the output of deflate is not affected by
+		 * the uninitialized values.
 		 */
 
-		if (match[best_len]   != scan_end  ||
-			match[best_len-1] != scan_end1 ||
-			*match			!= *scan	 ||
-			*++match		  != scan[1])	  continue;
+		if (match[best_len] != scan_end
+		    || match[best_len - 1] != scan_end1
+		    || *match != *scan
+		    || *++match != scan[1])
+			continue;
+		scan++;
 
-		/* The check at best_len-1 can be removed because it will be made
-		 * again later. (This heuristic is not always a win.)
-		 * It is not necessary to compare scan[2] and match[2] since they
-		 * are always equal when the other bytes match, given that
-		 * the hash keys are equal and that HASH_BITS >= 8.
-		 */
-		scan += 2, match++;
-
-		wimlib_assert(*scan == *match);
+	#if 0
+		do {
+		} while (scan < strend && *++match == *++scan);
+	#else
 
-		/* We check for insufficient lookahead only every 8th comparison;
-		 * the 256th check will be made at strstart+258.  */
 		do {
-		} while (*++scan == *++match && *++scan == *++match &&
-				 *++scan == *++match && *++scan == *++match &&
-				 *++scan == *++match && *++scan == *++match &&
-				 *++scan == *++match && *++scan == *++match &&
-				 scan < strend);
+		} while (
+			 *++match == *++scan && *++match == *++scan &&
+			 *++match == *++scan && *++match == *++scan &&
+			 *++match == *++scan && *++match == *++scan &&
+			 *++match == *++scan && *++match == *++scan &&
+			 scan < strend);
+	#endif
+		len = match - &window[cur_match];
 
-		len = params->max_match - (int)(strend - scan);
-		scan = strend - params->max_match;
+		scan = &window[strstart];
 
 		if (len > best_len) {
 			match_start = cur_match;
@@ -166,8 +172,7 @@ static uint longest_match(const u8 window[], uint bytes_remaining,
 			scan_end1  = scan[best_len - 1];
 			scan_end   = scan[best_len];
 		}
-		cur_match = prev_tab[cur_match];
-	} while (--chain_len != 0 && cur_match != 0);
+	} while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) != 0);
 	*match_start_ret = match_start;
 	return min(min(best_len, bytes_remaining), params->max_match);
 }
@@ -235,8 +240,8 @@ uint lz_analyze_block(const u8 uncompressed_data[], uint uncompressed_len,
 		 * hash bucket, or 0 if there is no such string */
 		if (uncompressed_len - cur_input_pos >= params->min_match) {
 			hash = insert_string(hash_tab, prev_tab, 
-						uncompressed_data, 
-						cur_input_pos, hash);
+					     uncompressed_data, 
+					     cur_input_pos, hash);
 			hash_head = prev_tab[cur_input_pos];
 		} else {
 			hash_head = 0;
@@ -254,16 +259,14 @@ uint lz_analyze_block(const u8 uncompressed_data[], uint uncompressed_len,
 			 * avoid a match of the string with itself at the start
 			 * of the input file).  */
 			match_len = longest_match(uncompressed_data, 
-						uncompressed_len - cur_input_pos,
-						cur_input_pos, prev_tab, 
-						hash_head, prev_len,
-						&match_start, params);
+						  uncompressed_len - cur_input_pos,
+						  cur_input_pos, prev_tab, 
+						  hash_head, prev_len,
+						  &match_start, params);
 
-			if (match_len == params->min_match && 
-				cur_input_pos - match_start > params->too_far) 
-			{
+			if (match_len == params->min_match &&
+			     cur_input_pos - match_start > params->too_far)
 				match_len = params->min_match - 1;
-			}
 		}
 
 		/* If there was a match at the previous step and the current
@@ -280,9 +283,9 @@ uint lz_analyze_block(const u8 uncompressed_data[], uint uncompressed_len,
 					/*prev_len);*/
 
 			match = (*record_match)(cur_input_pos - 1 - prev_start, 
-							prev_len, 
-							record_match_arg1,
-							record_match_arg2);
+						prev_len, 
+						record_match_arg1,
+						record_match_arg2);
 
 			match_tab[cur_match_pos++] = match;
 
@@ -291,17 +294,21 @@ uint lz_analyze_block(const u8 uncompressed_data[], uint uncompressed_len,
 			 * enough lookahead, the last two strings are not inserted in
 			 * the hash table.
 			 */
-			prev_len -= 2;
-
-			do {
-				if (++cur_input_pos <= max_insert) {
-					hash = insert_string(hash_tab, prev_tab,
-								uncompressed_data,
-								cur_input_pos,
-								hash);
-				}
-			} while (--prev_len != 0);
-
+#if LZ_MIN_MATCH == 2
+			if (prev_len >= 3)
+#endif
+			{
+				prev_len -= 2;
+
+				do {
+					if (++cur_input_pos <= max_insert) {
+						hash = insert_string(hash_tab, prev_tab,
+								     uncompressed_data,
+								     cur_input_pos,
+								     hash);
+					}
+				} while (--prev_len != 0);
+			}
 			match_available = false;
 			match_len = params->min_match - 1;
 		} else if (match_available) {
diff --git a/src/lzx-comp.c b/src/lzx-comp.c
index bf120f58..8267a2ea 100644
--- a/src/lzx-comp.c
+++ b/src/lzx-comp.c
@@ -599,7 +599,11 @@ static void do_call_insn_preprocessing(u8 uncompressed_data[],
 
 
 static const struct lz_params lzx_lz_params = {
+
+	 /* LZX_MIN_MATCH == 2, but 2-character matches are rarely useful; the
+	  * minimum match for compression is set to 3 instead. */
 	.min_match      = 3,
+
 	.max_match      = LZX_MAX_MATCH,
 	.good_match	= LZX_MAX_MATCH,
 	.nice_match     = LZX_MAX_MATCH,
diff --git a/src/lzx-decomp.c b/src/lzx-decomp.c
index 8aa0b755..d598d941 100644
--- a/src/lzx-decomp.c
+++ b/src/lzx-decomp.c
@@ -626,8 +626,21 @@ static int lzx_decode_match(int main_element, int block_type,
 		return -1;
 	}
 
+#if 0
+	printf("Match: src %u, dst %u, len %u\n", match_src - window,
+						match_dest - window,
+						match_len);
+	putchar('|');
+	for (i = 0; i < match_len; i++) {
+		match_dest[i] = match_src[i];
+		putchar(match_src[i]);
+	}
+	putchar('|');
+	putchar('\n');
+#else
 	for (i = 0; i < match_len; i++)
 		match_dest[i] = match_src[i];
+#endif
 
 	return match_len;
 }
-- 
2.43.0