WIMLIB
-This is wimlib version 1.0.2 (September 2012). wimlib can be used to read,
+This is wimlib version 1.0.3 (September 2012). wimlib can be used to read,
write, and mount files in the Windows Imaging Format (WIM files). These
files are normally created by using the `imagex.exe' utility on Windows,
but this library provides a free implementation of imagex for UNIX-based
image of Windows PE that can be put on a CD or USB drive, or published on a
server for PXE booting. See the main page `doc/mkwinpeiso.1' for more details.
+ COMPRESSION RATIO
+
+wimlib can create XPRESS or LZX compressed WIM archives. As of wimlib v1.0.3,
+the XPRESS compression ratio is slightly better than that provided by
+Microsoft's software, while the LZX compression ratio is approaching that of
+Microsoft's software but is not quite there yet. Running time is as good as or
+better than Microsoft's software.
+
+The following tables compare the compression ratio and performance for creating
+a compressed Windows PE image (disk usage of about 524 MB, uncompressed WIM size
+361 MB):
+
+ Table 1. WIM size
+
+ XPRESS Compression LZX Compression
+ wimlib imagex (v1.0.2): 145,283,871 bytes 139,288,293 bytes
+ wimlib imagex (v1.0.3): 139,288,293 bytes 131,379,869 bytes
+ Microsoft imagex.exe: 140,406,981 bytes 127,249,176 bytes
+
+ Table 2. Time to create WIM
+
+ XPRESS Compression LZX Compression
+ wimlib imagex (v1.0.2): 18 sec 49 sec
+ wimlib imagex (v1.0.3): 19 sec 30 sec
+ Microsoft imagex.exe: 25 sec 89 sec
+
+
DEPENDENCIES
* libxml2
#include "comp.h"
#include <string.h>
+#define LZ_MIN_MATCH 3
+
#define HASH_BITS 15
#define HASH_SIZE (1 << HASH_BITS)
#define HASH_MASK (HASH_SIZE - 1)
-#define HASH_SHIFT ((HASH_BITS + 2) / 3)
+
+#if LZ_MIN_MATCH == 2
+# define HASH_SHIFT 8
+#elif LZ_MIN_MATCH == 3
+# define HASH_SHIFT 5
+#else
+#error "Invalid LZ_MIN_MATCH"
+#endif
/* Hash function, based on code from zlib. This function will update and return
* the hash value @hash for the string ending on the additional input character
static inline uint insert_string(u16 hash_tab[], u16 prev_tab[],
const u8 window[], uint str_pos, uint hash)
{
- hash = update_hash(hash, window[str_pos + 2]);
+ hash = update_hash(hash, window[str_pos + LZ_MIN_MATCH - 1]);
prev_tab[str_pos] = hash_tab[hash];
hash_tab[hash] = str_pos;
return hash;
uint nice_match = min(params->nice_match, bytes_remaining);
- const u8 *strend = scan + params->max_match;
+ const u8 *strend = scan + min(params->max_match, bytes_remaining);
u8 scan_end1 = scan[best_len - 1];
u8 scan_end = scan[best_len];
do {
match = &window[cur_match];
-
- /* Skip to next match if the match length cannot increase
- * or if the match length is less than 2. Note that the checks below
- * for insufficient lookahead only occur occasionally for performance
- * reasons. Therefore uninitialized memory will be accessed, and
- * conditional jumps will be made that depend on those values.
- * However the length of the match is limited to the lookahead, so
- * the output of deflate is not affected by the uninitialized values.
+ /* Skip to next match if the match length cannot increase or if
+ * the match length is less than 2. Note that the checks below
+ * for insufficient lookahead only occur occasionally for
+ * performance reasons. Therefore uninitialized memory will be
+ * accessed, and conditional jumps will be made that depend on
+ * those values. However the length of the match is limited to
+ * the lookahead, so the output of deflate is not affected by
+ * the uninitialized values.
*/
- if (match[best_len] != scan_end ||
- match[best_len-1] != scan_end1 ||
- *match != *scan ||
- *++match != scan[1]) continue;
+ if (match[best_len] != scan_end
+ || match[best_len - 1] != scan_end1
+ || *match != *scan
+ || *++match != scan[1])
+ continue;
+ scan++;
- /* The check at best_len-1 can be removed because it will be made
- * again later. (This heuristic is not always a win.)
- * It is not necessary to compare scan[2] and match[2] since they
- * are always equal when the other bytes match, given that
- * the hash keys are equal and that HASH_BITS >= 8.
- */
- scan += 2, match++;
-
- wimlib_assert(*scan == *match);
+ #if 0
+ do {
+ } while (scan < strend && *++match == *++scan);
+ #else
- /* We check for insufficient lookahead only every 8th comparison;
- * the 256th check will be made at strstart+258. */
do {
- } while (*++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- scan < strend);
+ } while (
+ *++match == *++scan && *++match == *++scan &&
+ *++match == *++scan && *++match == *++scan &&
+ *++match == *++scan && *++match == *++scan &&
+ *++match == *++scan && *++match == *++scan &&
+ scan < strend);
+ #endif
+ len = match - &window[cur_match];
- len = params->max_match - (int)(strend - scan);
- scan = strend - params->max_match;
+ scan = &window[strstart];
if (len > best_len) {
match_start = cur_match;
scan_end1 = scan[best_len - 1];
scan_end = scan[best_len];
}
- cur_match = prev_tab[cur_match];
- } while (--chain_len != 0 && cur_match != 0);
+ } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) != 0);
*match_start_ret = match_start;
return min(min(best_len, bytes_remaining), params->max_match);
}
* hash bucket, or 0 if there is no such string */
if (uncompressed_len - cur_input_pos >= params->min_match) {
hash = insert_string(hash_tab, prev_tab,
- uncompressed_data,
- cur_input_pos, hash);
+ uncompressed_data,
+ cur_input_pos, hash);
hash_head = prev_tab[cur_input_pos];
} else {
hash_head = 0;
* avoid a match of the string with itself at the start
* of the input file). */
match_len = longest_match(uncompressed_data,
- uncompressed_len - cur_input_pos,
- cur_input_pos, prev_tab,
- hash_head, prev_len,
- &match_start, params);
+ uncompressed_len - cur_input_pos,
+ cur_input_pos, prev_tab,
+ hash_head, prev_len,
+ &match_start, params);
- if (match_len == params->min_match &&
- cur_input_pos - match_start > params->too_far)
- {
+ if (match_len == params->min_match &&
+ cur_input_pos - match_start > params->too_far)
match_len = params->min_match - 1;
- }
}
/* If there was a match at the previous step and the current
/*prev_len);*/
match = (*record_match)(cur_input_pos - 1 - prev_start,
- prev_len,
- record_match_arg1,
- record_match_arg2);
+ prev_len,
+ record_match_arg1,
+ record_match_arg2);
match_tab[cur_match_pos++] = match;
* enough lookahead, the last two strings are not inserted in
* the hash table.
*/
- prev_len -= 2;
-
- do {
- if (++cur_input_pos <= max_insert) {
- hash = insert_string(hash_tab, prev_tab,
- uncompressed_data,
- cur_input_pos,
- hash);
- }
- } while (--prev_len != 0);
-
+#if LZ_MIN_MATCH == 2
+ if (prev_len >= 3)
+#endif
+ {
+ prev_len -= 2;
+
+ do {
+ if (++cur_input_pos <= max_insert) {
+ hash = insert_string(hash_tab, prev_tab,
+ uncompressed_data,
+ cur_input_pos,
+ hash);
+ }
+ } while (--prev_len != 0);
+ }
match_available = false;
match_len = params->min_match - 1;
} else if (match_available) {