Merge compression updates

author Eric Biggers <ebiggers3@gmail.com>

Sat, 19 Jul 2014 22:11:59 +0000 (17:11 -0500)

committer Eric Biggers <ebiggers3@gmail.com>

Sat, 19 Jul 2014 22:14:31 +0000 (17:14 -0500)
author Eric Biggers <ebiggers3@gmail.com>
Sat, 19 Jul 2014 22:11:59 +0000 (17:11 -0500)
committer Eric Biggers <ebiggers3@gmail.com>
Sat, 19 Jul 2014 22:14:31 +0000 (17:14 -0500)
diff --git a/Makefile.am b/Makefile.am

index 2e1298b6bf8f3f9e94fef248cc4afff118d69be3..b5568c177d11bd8539ce97d7e1a234fb5c7d6c3d 100644 (file)
--- a/Makefile.am
+++ b/Makefile.am
@@ -29,6 +29,7 @@ libwim_la_SOURCES =           \
         src/decompress_common.c \
         src/delete_image.c      \
         src/dentry.c            \
+       src/divsufsort.c        \
         src/encoding.c          \
         src/export_image.c      \
         src/extract.c           \
@@ -40,11 +41,17 @@ libwim_la_SOURCES =         \
         src/iterate_dir.c       \
         src/join.c              \
         src/lookup_table.c      \
+       src/lz_binary_trees.c   \
+       src/lz_brute_force.c    \
+       src/lz_hash_chains.c    \
+       src/lz_lcp_interval_tree.c      \
+       src/lz_linked_suffix_array.c    \
+       src/lz_mf.c             \
+       src/lz_null.c           \
+       src/lz_suffix_array_utils.c     \
         src/lzms-common.c       \
         src/lzms-compress.c     \
         src/lzms-decompress.c   \
-       src/lz_bt.c             \
-       src/lz_hash.c           \
         src/lzx-common.c        \
         src/lzx-compress.c      \
         src/lzx-decompress.c    \
@@ -84,6 +91,7 @@ libwim_la_SOURCES =           \
         include/wimlib/decompressor_ops.h       \
         include/wimlib/decompress_common.h      \
         include/wimlib/dentry.h         \
+       include/wimlib/divsufsort.h     \
         include/wimlib/encoding.h       \
         include/wimlib/endianness.h     \
         include/wimlib/error.h          \
@@ -95,9 +103,9 @@ libwim_la_SOURCES =          \
         include/wimlib/integrity.h      \
         include/wimlib/list.h           \
         include/wimlib/lookup_table.h   \
-       include/wimlib/lz.h             \
-       include/wimlib/lz_bt.h          \
-       include/wimlib/lz_hash.h        \
+       include/wimlib/lz_mf.h          \
+       include/wimlib/lz_mf_ops.h      \
+       include/wimlib/lz_suffix_array_utils.h  \
         include/wimlib/lzms.h           \
         include/wimlib/lzx.h            \
         include/wimlib/metadata.h       \
diff --git a/NEWS b/NEWS

index 82aef67738ef9a5dcdc7951f3f2211aa82ea0491..921e1662fd72d16c462d8a9b682912915ef75a2a 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,19 +1,51 @@
  Version 1.7.1-BETA:
+       Made more improvements to the compression algorithms.
+
+       The default compression mode for wimcapture is now LZX compression in
+       its default mode, which is the the same as '--compress=maximum'.
+
+       You can now specify an optional integer compression level to the
+       '--compress' or '--solid-compress' options; e.g.  '--compress=lzx:75'.
+       Currently, the default level in all cases is 50.  This capability
+       deprecates the '--compress-slow' (or '--recompress-slow') option to
+       several wimlib-imagex commands.
+
+       The '--pack-streams', '--pack-compress', and '--pack-chunk-size' options
+       to several wimlib-imagex commands have been deprecated in favor of more
+       "standard" names: '--solid', '--solid-compress', and
+       '--solid-chunk-size', respectively.
+
+       The XPRESS compressor no longer supports chunks larger than 65536 bytes.
+       There is little point in having larger chunks, since the LZ77 sliding
+       window for XPRESS cannot be larger than 65536 bytes.  This change does
+       not affect the default XPRESS chunk size, which is 32768.
+
         The new (as of v1.7.0) extraction code will no longer run out of file
         handles when extracting many (1000+) identical files.
  
-       Library users can now initialize and de-initialize the library multiple
-       times in one run of an application program.
+       Library changes:
+
+               Custom compressor parameters have been removed from the library
+               in favor of the simpler level-based API.
+
+               Decompressor parameters have been removed entirely.
+
+               The maximum XPRESS chunk size has been reduced, as mentioned
+               above.
+
+               Library users can now initialize and de-initialize the library
+               multiple times in one run of an application program.
  
-       Library users will now receive WIMLIB_PROGRESS_MSG_WRITE_STREAMS and
-       WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS messages more frequently when
-       writing or extracting large WIM files.
+               Library users will now receive WIMLIB_PROGRESS_MSG_WRITE_STREAMS
+               and WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS messages more frequently
+               when writing or extracting large WIM files.
  
-       Added experimental new write flag:
-       WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES.
+               Added experimental new write flag:
+               WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES.
  
-       Library users can now specify a custom file for warning and error
-       messages to be sent to, rather than the default of standard error.
+               Library users can now specify a custom file for warning and
+               error messages to be sent to, rather than the default of
+               standard error.
  
  Version 1.7.0:
         Improved compression, decompression, and extraction performance.
diff --git a/README b/README

index 108b8a6cf95c20b04960cb58c2572f036cfb88be..23154f14e6815a6430fd7104bc2551040e1f639e 100644 (file)
--- a/README
+++ b/README
@@ -333,11 +333,8 @@ cabextract project (http://www.cabextract.org.uk).  The LZX compressor
  (www.russotto.net/chm/).  However I have since rewritten and made many
  improvements to both the decompressor and compressor.
  
-lz_hash.c contains LZ77 match-finding code that uses hash chains.  It is based
-on code from zlib but I have since rewritten it.
-
-lz_bt.c contains LZ77 match-finding code that uses binary trees.  It is based on
-code from liblzma but I have since rewritten it.
+lz_binary_trees.c contains LZ77 match-finding code that uses binary trees.  It
+is based on code from liblzma but I have since rewritten it.
  
  A limited number of other free programs can handle some parts of the WIM
  file format:
diff --git a/doc/man1/imagex-capture.1.in b/doc/man1/imagex-capture.1.in

index 465b01b092698f0342bab9aca16f2ba29877429c..6735c28f5bea9a48532d53c9c18e7b58590f21e4 100644 (file)
--- a/doc/man1/imagex-capture.1.in
+++ b/doc/man1/imagex-capture.1.in
@@ -192,29 +192,20 @@ a WIM file created with \fB@IMAGEX_PROGNAME@ capture\fR, while a WIM file
  updated with \fB@IMAGEX_PROGNAME@ append\fR will be written with an integrity
  table if and only if one was present before.
  .TP
-\fB--compress\fR=\fITYPE\fR
-Specifies the compression type for the new WIM file.  This flag is only valid
-for \fB@IMAGEX_PROGNAME@ capture\fR, since the compression mode for
-\fB@IMAGEX_PROGNAME@ append\fR must be the same as that of the existing
-WIM (and is automatically set as such).  \fITYPE\fR may be "none",
-"fast", or "maximum".  As of wimlib v1.5.3, the default is LZX compression, but
-in a special mode that is somewhere in between "fast" and "maximum" in terms of
-speed and compression ratio.  Use \fB--compress\fR=\fImaximum\fR to explicitly
-request a better compression ratio at the cost of more time spent compressing.
-.IP ""
-You may also specify the actual names of the compression algorithms, "XPRESS"
-and "LZX", instead of "fast" and "maximum", respectively.
-.IP ""
-As of wimlib v1.6.0, a third compression type, "recovery" or "LZMS", is also
-available.  Its use is generally not recommended because other than wimlib
-itself, it is only compatible with WIMGAPI Windows 8 and later, and DISM Windows
-8.1 and later.  However, LZMS is the compression algorithm used by default in
-packed resources created if the \fB--pack-streams\fR option is specified.
-.TP
-\fB--compress-slow\fR
-Spend even more time compressing the data to achieve a very slightly better
-compression ratio.  This currently only has an effect for LZX ("maximum", the
-default) and LZMS ("recovery") compression.
+\fB--compress\fR=\fITYPE\fR[:\fILEVEL\fR]
+Specifies the compression format for the new WIM file.  \fITYPE\fR may be
+"none", "XPRESS" (alias: "fast"), "LZX" (alias: "maximum"), or "LZMS" (alias:
+"recovery").  \fITYPE\fR is matched case-insensitively.  The default is "LZX".
+.IP ""
+You can optionally also specify an integer compression \fILEVEL\fR.  The
+compression level specifies how hard the compression algorithm for the specified
+compression \fITYPE\fR will work to compress the data.  The values are scaled so
+that 20 is quick compression, 50 is medium compression, and 100 is high
+compression.  However, you can choose any value, and not just these particular
+values.  The default is 50.
+.IP ""
+Be careful if you choose LZMS compression.  It is not compatible with wimlib
+before v1.6.0, WIMGAPI before Windows 8, DISM before Windows 8.1, and 7-Zip.
  .TP
  \fB--chunk-size\fR=\fISIZE\fR
  Set the WIM compression chunk size to \fISIZE\fR bytes.  Larger chunks mean larger
@@ -225,7 +216,7 @@ decide to use this option regardless, you may choose a chunk size that is
  allowed by the compression format.  All formats only allow power-of-2 chunk
  sizes.  For LZX ("maximum") compression the maximum allowed chunk size is 2^21
  (2097152), for XPRESS ("fast") compression the maximum allowed chunk size is
-2^26 (67108864), and for LZMS ("recovery") compression the maximum allowed chunk
+2^16 (65536), and for LZMS ("recovery") compression the maximum allowed chunk
  size is 2^30 (1073741824).
  .IP ""
  Beware that Microsoft's implementation has limited support for non-default chunk
@@ -234,7 +225,7 @@ open it and crash, or open it and report the data is invalid, or even extract
  the data incorrectly.  In addition, wimlib versions before 1.6.0 do not support
  alternate chunk sizes.
  .TP
-\fB--pack-streams\fR, \fB--solid\fR
+\fB--solid\fR
  Create a "solid" archive that compresses multiple unique streams ("files")
  together, rather than each unique stream ("file") independently.  This can
  result in a significantly better compression ratio, but this format greatly
@@ -243,23 +234,21 @@ mounted with \fB@IMAGEX_PROGNAME@ mount\fR.  Also, WIMs created using this
  option use a different version number in their header and are only compatible
  with WIMGAPI Windows 8 and later, and DISM Windows 8.1 and later.
  .IP ""
-The default compression type and chunk size in packed resources is LZMS with
-2^26 (67108864) byte chunks.  This is independent of the WIM's main compression
-type and chunk size.
+The default compression type and chunk size in solid blocks is LZMS with 2^25
+(33554432) byte chunks.  This is independent of the WIM's main compression type
+and chunk size.
  .TP
-\fB--pack-chunk-size\fR=\fISIZE\fR, \fB--solid-chunk-size\fR=\fISIZE\fR
-Like \fB--chunk-size\fR, but set the chunk size used in packed resources.  The
-default is LZMS compression with 2^26 (67108864) byte chunks.  This option only
-has an effect when \fB--pack-streams\fR is also specified.  For maximum
-compatibility with the Microsoft implementation, do not use either of these
-options.
+\fB--solid-chunk-size\fR=\fISIZE\fR
+Like \fB--chunk-size\fR, but set the chunk size used in solid blocks.  The
+default is LZMS compression with 2^25 (33554432) byte chunks.  This option only
+has an effect when \fB--solid\fR is also specified.  For maximum compatibility
+with the Microsoft implementation, do not use either of these options.
  .TP
-\fB--pack-compress\fR=\fITYPE\fR, \fB--solid-compress\fR=\fITYPE\fR
-Like \fB--compress\fR, but set the compression format used in packed resources.
-The default is LZMS compression with 2^26 (67108864) byte chunks.  This option
-only has an effect when \fB--pack-streams\fR is also specified.  For maximum
-compatibility with the Microsoft implementation, do not use either of these
-options.
+\fB--solid-compress\fR=\fITYPE\fR[:\fILEVEL\fR]
+Like \fB--compress\fR, but set the compression type used in solid blocks.  The
+default is LZMS compression with 2^25 (33554432) byte chunks.  This option only
+has an effect when \fB--solid\fR is also specified.  For maximum compatibility
+with the Microsoft implementation, do not use either of these options.
  .TP
  \fB--threads\fR=\fINUM_THREADS\fR
  Number of threads to use for compressing data.  Default: autodetect (number of
@@ -587,7 +576,7 @@ created only if \fIWIMFILE\fR was specified as "-" (standard output) or if
  the \fB--pipable\fR flag was specified.
  .IP \[bu]
  WIMs captured with a non-default chunk size (with the \fB--chunk-size\fR option)
-or as solid archives (with the \fB--pack-streams\fR option) or with LZMS
+or as solid archives (with the \fB--solid\fR option) or with LZMS
  compression (with \fB--compress\fR=LZMS or \fB--compress\fR=recovery) have
  varying levels of compatibility with Microsoft's software.  Generally, more
  recent versions of Microsoft's software are more compatible.
diff --git a/doc/man1/imagex-export.1.in b/doc/man1/imagex-export.1.in

index 73e01f63b91b7fcf010cfe7af3b71bf22df7c01e..405f6e26b8c68adf6e883658d3ade9352a97671c 100644 (file)
--- a/doc/man1/imagex-export.1.in
+++ b/doc/man1/imagex-export.1.in
@@ -64,49 +64,37 @@ If neither \fB--check\fR nor \fB--nocheck\fR is specified, an integrity
  table is included in \fIDEST_WIMFILE\fR if and only if \fIDEST_WIMFILE\fR
  already existed and it had an integrity table before.
  .TP
-\fB--compress\fR=\fITYPE\fR
-Specifies the compression type for \fIDEST_WIMFILE\fR.  This is only valid if
-\fIDEST_WIMFILE\fR does not yet exist, since if \fIDEST_WIMFILE\fR exists, the
-compression type must be the same as that of \fIDEST_WIMFILE\fR.
+\fB--compress\fR=\fITYPE\fR[:\fILEVEL\fR]
+Specifies the compression type, and optionally the compression level for that
+compression type, for \fIDEST_WIMFILE\fR.  Setting the compression type only has
+an effect if \fIDEST_WIMFILE\fR does not yet exist, since if \fIDEST_WIMFILE\fR
+exists, the compression type must be the same as that of \fIDEST_WIMFILE\fR.
  .IP ""
-\fITYPE\fR may be "none", "maximum", or "fast".  By default, it is the same as
-that of the input WIM file.
-.IP ""
-You may also specify the actual names of the compression algorithms, "XPRESS"
-and "LZX", instead of "fast" and "maximum", respectively.
-.IP ""
-\fITYPE\fR may also be "recovery" (or "LZMS"); however, this will result in
-reduced compatibility.  See the documentation for this option to
-\fB@IMAGEX_PROGNAME@ capture\fR (1) for more details.
+See the documentation for this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for
+more details.
  .TP
  \fB--recompress\fR
  Force all exported data to be recompressed, even if the destination WIM will use
  the same compression type as the source WIM.
  .TP
-\fB--compress-slow\fR
-Spend even more time compressing the data to achieve a very slightly better
-compression ratio.  This currently only has an effect for LZX ("maximum") and
-LZMS ("recovery") compression.  This option does not itself set the compression
-format.
-.TP
  \fB--chunk-size\fR=\fISIZE\fR
  Set the WIM compression chunk size to \fISIZE\fR.  See the documentation for
  this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for more details.
  .TP
-\fB--pack-streams\fR, \fB--solid\fR
+\fB--solid\fR
  Create a "solid" archive that compresses multiple files together.  This can
  result in a higher compression ratio, but has disadvantages such as reduced
  compatibility.  See the documentation for this option to \fB@IMAGEX_PROGNAME@
  capture\fR (1) for more details.
  .TP
-\fB--pack-chunk-size\fR=\fISIZE\fR, \fB--solid-chunk-size\fR=\fISIZE\fR
-Like \fB--chunk-size\fR, but set the chunk size used in packed resources.  See
-the documentation for this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for
-more details.
+\fB--solid-chunk-size\fR=\fISIZE\fR
+Like \fB--chunk-size\fR, but set the chunk size used in solid blocks.  See the
+documentation for this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for more
+details.
  .TP
-\fB--pack-compress\fR=\fITYPE\fR, \fB--solid-compress\fR=\fITYPE\fR
-Like \fB--compress\fR, but set the compression format used in packed resources.
-See the documentation for this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for
+\fB--solid-compress\fR=\fITYPE\fR[:\fILEVEL\fR]
+Like \fB--compress\fR, but set the compression type used in solid blocks.  See
+the documentation for this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for
  more details.
  .TP
  \fB--threads\fR=\fINUM_THREADS\fR
diff --git a/doc/man1/imagex-extract.1.in b/doc/man1/imagex-extract.1.in

index b14caafcd4b576d8c9dbd8ca215b87109c9fea7a..d45605820b60c3c1e758ebb3522692586f310d4a 100644 (file)
--- a/doc/man1/imagex-extract.1.in
+++ b/doc/man1/imagex-extract.1.in
@@ -168,7 +168,7 @@ Unlike \fB@IMAGEX_PROGNAME@ apply\fR, \fB@IMAGEX_PROGNAME@ extract\fR does not
  support extracting files directly to an NTFS volume using libntfs-3g.
  .PP
  wimlib v1.6.0 and later can extract files from version 3584 WIMs, which usually
-use packed, LZMS-compressed streams and may carry the \fI.esd\fR file extension
+contain LZMS-compressed solid blocks and may carry the \fI.esd\fR file extension
  rather than \fI.wim\fR.  However, \fI.esd\fR files downloaded directly by the
  Windows 8 web downloader have encrypted segments, and wimlib cannot extract such
  files until they are first decrypted.  Furthermore, such files are not designed
diff --git a/doc/man1/imagex-mount.1.in b/doc/man1/imagex-mount.1.in

index 2ac40fbf5370000ba253d95564142ff7c0883988..c38c69c2a58157d237c54797ad0e831657115f7d 100644 (file)
--- a/doc/man1/imagex-mount.1.in
+++ b/doc/man1/imagex-mount.1.in
@@ -73,12 +73,12 @@ provide the \fB--rebuild\fR option to \fB@IMAGEX_PROGNAME@ unmount\fR to force
  the WIM to be rebuilt, or else run \fB@IMAGEX_PROGNAME@ optimize\fR on the WIM
  afterwards.
  .PP
-wimlib v1.6.0 and later can mount version 3584 WIMs, which usually use packed,
-LZMS-compressed streams and may carry the \fI.esd\fR file extension rather than
-\fI.wim\fR.  However, such files are not designed for random access, so reading
-data from them when mounted may be very slow.  In addition, \fI.esd\fR files
-downloaded directly by the Windows 8 web downloader have encrypted segments, and
-wimlib cannot mount such files until they are first decrypted.
+wimlib v1.6.0 and later can mount version 3584 WIMs, which usually contain
+LZMS-compressed solid blocks and may carry the \fI.esd\fR file extension rather
+than \fI.wim\fR.  However, such files are not designed for random access, so
+reading data from them when mounted may be very slow.  In addition, \fI.esd\fR
+files downloaded directly by the Windows 8 web downloader have encrypted
+segments, and wimlib cannot mount such files until they are first decrypted.
  .SH MOUNT OPTIONS
  .TP 6
  \fB--check\fR
diff --git a/doc/man1/imagex-optimize.1.in b/doc/man1/imagex-optimize.1.in

index c954dae25a6d9fbd9cecc260a0d5e0c37edcdbe9..67894196b0638ba265a956bab9bfc03cd48908b5 100644 (file)
--- a/doc/man1/imagex-optimize.1.in
+++ b/doc/man1/imagex-optimize.1.in
@@ -32,48 +32,33 @@ uncompressed, but it may result in a better compression ratio if wimlib can do a
  better job than the program that wrote the original file.  A side effect of this
  is that every stream in the original WIM will be checksummed, so this can help
  verify that the WIM is intact (equivalent to applying all the images from it).
-.IP ""
-Note: as of wimlib v1.7.0, wimlib's LZX compressor usually achieves the same or
-better compression than Microsoft's, but is about 10% slower.
-.TP
-\fB--recompress-slow\fR, \fB--compress-slow\fR
-Spend even more time compressing the data in order to achieve a more optimal
-compression ratio.  For LZX ("maximum") compression, compared to the default
-\fB--recompress\fR this will make compression about twice as slow and may
-improve the compression ratio by maybe 1%, depending on the data.  For LZMS
-("recovery") compression this option also has an effect.  For XPRESS ("fast")
-compression this option has no effect; however you may use \fB--compress\fR=LZX
-\fB--recompress-slow\fR to change the compression type to LZX and recompress
-slowly, as per this option.  In any case, this option implies
-\fB--recompress\fR.
  .TP
-\fB--compress\fR=\fITYPE\fR
-Recompress the WIM file using the specified compression type.  \fITYPE\fR may be
-"none", "fast" (or "XPRESS"), or "maximum" (or "LZX").  This implies
+\fB--compress\fR=\fITYPE\fR[:\fILEVEL\fR
+Recompress the WIM file using the specified compression type, and optionally the
+specified compression level for that compression type.  This implies
  \fB--recompress\fR.
  .IP ""
-\fITYPE\fR may also be "recovery" (or "LZMS"); however, this will result in
-reduced compatibility.  See the documentation for this option to
-\fB@IMAGEX_PROGNAME@ capture\fR (1) for more details.
+See the documentation for this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for
+more details.
  .TP
  \fB--chunk-size\fR=\fISIZE\fR
  Set the WIM compression chunk size to \fISIZE\fR.  See the documentation for
  this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for more details.
  .TP
-\fB--pack-streams\fR, \fB--solid\fR
+\fB--solid\fR
  Create a "solid" archive that compresses multiple files together.  This can
  result in a higher compression ratio, but has disadvantages such as reduced
  compatibility.  See the documentation for this option to \fB@IMAGEX_PROGNAME@
  capture\fR (1) for more details.
  .TP
-\fB--pack-chunk-size\fR=\fISIZE\fR, \fB--solid-chunk-size\fR=\fISIZE\fR
-Like \fB--chunk-size\fR, but set the chunk size used in packed resources.  See
-the documentation for this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for
-more details.
+\fB--solid-chunk-size\fR=\fISIZE\fR
+Like \fB--chunk-size\fR, but set the chunk size used in solid blocks.  See the
+documentation for this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for more
+details.
  .TP
-\fB--pack-compress\fR=\fITYPE\fR, \fB--solid-compress\fR=\fITYPE\fR
-Like \fB--compress\fR, but set the compression format used in packed resources.
-See the documentation for this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for
+\fB--solid-compress\fR=\fITYPE\fR[:\fILEVEL\fR]
+Like \fB--compress\fR, but set the compression type used in solid blocks.  See
+the documentation for this option to \fB@IMAGEX_PROGNAME@ capture\fR (1) for
  more details.
  .TP
  \fB--threads\fR=\fINUM_THREADS\fR
diff --git a/examples/Makefile b/examples/Makefile

index 7a4719e9dac487ae092c34480964e4a5f7539433..72914b6e5df0531cd46349e4a3ba7e9610070c1d 100644 (file)
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -1,7 +1,7 @@
  CFLAGS := -Wall
  LDLIBS := -lwim
  
-EXE := applywim capturewim compressfile decompressfile
+EXE := applywim capturewim updatewim compressfile decompressfile
  
  all:$(EXE)
  
diff --git a/examples/compressfile.c b/examples/compressfile.c

index 8ee2a8ae4dd2ed40b835da64c93a363787c7c58a..6887e0e4cae389c676bb023a1a46eab269ac6327 100644 (file)
--- a/examples/compressfile.c
+++ b/examples/compressfile.c
@@ -146,7 +146,7 @@ int main(int argc, char **argv)
  
         /* Create a compressor for the compression type and chunk size with the
          * default parameters.  */
-       ret = wimlib_create_compressor(ctype, chunk_size, NULL, &compressor);
+       ret = wimlib_create_compressor(ctype, chunk_size, 0, &compressor);
         if (ret != 0)
                 error(1, 0, "Failed to create compressor: %s",
                       wimlib_get_error_string(ret));
diff --git a/examples/decompressfile.c b/examples/decompressfile.c

index 8e461d43f86d33110c1171a8325b3d1ca96012c8..0d8bee6b4a85696a405cdbec645b1f1c6ef5a806 100644 (file)
--- a/examples/decompressfile.c
+++ b/examples/decompressfile.c
@@ -136,7 +136,7 @@ int main(int argc, char **argv)
  
         /* Create a decompressor for the compression type and chunk size with
          * the default parameters.  */
-       ret = wimlib_create_decompressor(ctype, chunk_size, NULL, &decompressor);
+       ret = wimlib_create_decompressor(ctype, chunk_size, &decompressor);
         if (ret != 0)
                 error(1, 0, "Failed to create decompressor: %s",
                       wimlib_get_error_string(ret));
diff --git a/examples/updatewim.c b/examples/updatewim.c

new file mode 100644 (file)

index 0000000..910eab1
--- /dev/null
+++ b/examples/updatewim.c
@@ -0,0 +1,97 @@
+/*
+ * updatewim.c - A program to add a file or directory tree to the first image of
+ * a WIM file.
+ *
+ * The author dedicates this file to the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#include <wimlib.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+       int ret;
+       char *wimfile;
+       char *wim_target_path;
+       char *fs_source_path;
+       WIMStruct *wim = NULL;
+
+       /* Check for the correct number of arguments.  */
+       if (argc != 4) {
+               fprintf(stderr, "Usage: updatewim WIMFILE WIM_PATH EXTERNAL_PATH\n");
+               return 2;
+       }
+
+       wimfile = argv[1];
+       wim_target_path = argv[2];
+       fs_source_path = argv[3];
+
+       /* Open the WIM file.  */
+       ret = wimlib_open_wim(wimfile, 0, &wim);
+       if (ret != 0)  /* Always should check the error codes.  */
+               goto out;
+
+       /* Update the WIM image.  In this simple example, we add a single file
+        * or directory tree to the specified location in the first image of the
+        * WIM file, using the default options.
+        *
+        * wimlib_add_tree() is actually sufficient for this case, but for the
+        * sake of demonstration we will use the more general function
+        * wimlib_update_image().  */
+
+       struct wimlib_update_command cmds[1];
+
+       memset(cmds, 0, sizeof(cmds));
+
+       /* Set up an "add" operation.
+        *
+        * Other available operations include WIMLIB_UPDATE_OP_RENAME and
+        * WIMLIB_UPDATE_OP_DELETE.  */
+       cmds[0].op = WIMLIB_UPDATE_OP_ADD;
+
+       /* Set the arguments to the operation.
+        *
+        * Make sure to fill in 'rename' or 'delete_' instead of 'add' if doing
+        * a rename or delete operation instead!  */
+       cmds[0].add.wim_target_path = wim_target_path;
+       cmds[0].add.fs_source_path = fs_source_path;
+
+       /* Note: we don't need to explicitly set 'cmds[0].add.config_file' and
+        * 'cmds[0].add.add_flags' because we zeroed the 'struct
+        * wimlib_update_command', and zero means use the defaults.  */
+
+       ret = wimlib_update_image(wim,  /* WIMStruct to update  */
+                                 1,    /* 1-based index of the image to update  */
+                                 cmds, /* Array of command structures  */
+                                 1,    /* Number of command structures in array  */
+                                 0);   /* WIMLIB_UPDATE_FLAG_* flags (0 for defaults)  */
+       if (ret != 0)
+               goto out;
+
+       /* Overwrite the WIM file.
+        *
+        * Normally, this will append new data to the file, rather than
+        * rebuilding the entire file.
+        *
+        * Changes do not take effect on-disk until this is done.  */
+
+       ret = wimlib_overwrite(wim, /* WIMStruct to commit to the underlying file  */
+                              0,   /* WIMLIB_WRITE_FLAG_* flags (0 for defaults)   */
+                              0);  /* Number of compressor threads (0 means default)  */
+
+out:
+       /* Free the WIMStruct.  Has no effect if the pointer to it is NULL.  */
+       wimlib_free(wim);
+
+       /* Check for error status.  */
+       if (ret != 0) {
+               fprintf(stderr, "wimlib error %d: %s\n",
+                       ret, wimlib_get_error_string(ret));
+       }
+
+       /* Free global memory (optional).  */
+       wimlib_global_cleanup();
+
+       return ret;
+}
diff --git a/include/wimlib.h b/include/wimlib.h

index 3335eba969b15366df4f0e99b56fd1fea4c2dc33..17e5957401a236446f85db7cefcea06a51e5a985 100644 (file)
--- a/include/wimlib.h
+++ b/include/wimlib.h
@@ -458,7 +458,7 @@ enum wimlib_compression_type {
         /** The XPRESS compression format.  This format combines Lempel-Ziv
          * factorization with Huffman encoding.  Compression and decompression
          * are both fast.  This format supports chunk sizes that are powers of 2
-        * between <c>2^12</c> and <c>2^26</c>, inclusively.  */
+        * between <c>2^12</c> and <c>2^16</c>, inclusively.  */
         WIMLIB_COMPRESSION_TYPE_XPRESS = 1,
  
         /** The LZX compression format.  This format combines Lempel-Ziv
@@ -1964,7 +1964,7 @@ typedef int (*wimlib_iterate_lookup_table_callback_t)(const struct wimlib_resour
   * all streams recompressed in solid mode.
   *
   * Currently, new solid blocks will, by default, be written using LZMS
- * compression with 64 MiB (67108864 byte) chunks.  Use
+ * compression with 32 MiB (33554432 byte) chunks.  Use
   * wimlib_set_output_pack_compression_type() and/or
   * wimlib_set_output_pack_chunk_size() to change this.  This is independent of
   * the WIM's main compression type and chunk size; you can have a WIM that
@@ -4279,144 +4279,6 @@ wimlib_write_to_fd(WIMStruct *wim,
   * @{
   */
  
-/** Header for compression parameters to pass to wimlib_create_compressor() or
- * wimlib_set_default_compressor_params().  */
-struct wimlib_compressor_params_header {
-       /** Size of the parameters, in bytes.  */
-       uint32_t size;
-};
-
-/** Header for decompression parameters to pass to wimlib_create_decompressor()
- * or wimlib_set_default_decompressor_params() */
-struct wimlib_decompressor_params_header {
-       /** Size of the parameters, in bytes.  */
-       uint32_t size;
-};
-
-/** LZX compression parameters that can optionally be passed to
- * wimlib_create_compressor() with the compression type
- * ::WIMLIB_COMPRESSION_TYPE_LZX.  */
-struct wimlib_lzx_compressor_params {
-       /** hdr.size Must be set to the size of this structure, in bytes.  */
-       struct wimlib_compressor_params_header hdr;
-
-       /** Relatively fast LZX compression algorithm with a decent compression
-        * ratio.  */
-#define WIMLIB_LZX_ALGORITHM_FAST 0
-
-       /** Slower LZX compression algorithm that provides a better compression
-        * ratio.  This is the default.  */
-#define WIMLIB_LZX_ALGORITHM_SLOW 1
-
-       /** Algorithm to use to perform the compression: either
-        * ::WIMLIB_LZX_ALGORITHM_FAST or ::WIMLIB_LZX_ALGORITHM_SLOW.  The
-        * format is still LZX; this refers to the method the code will use to
-        * perform LZX-compatible compression.  */
-       uint32_t algorithm : 3;
-
-       /** If set to 1, the default parameters for the specified algorithm are
-        * used rather than the ones specified in the following union.  */
-       uint32_t use_defaults : 1;
-
-       union {
-               /** Parameters for the fast algorithm.  */
-               struct wimlib_lzx_fast_params {
-                       uint32_t fast_reserved1[10];
-               } fast;
-
-               /** Parameters for the "slow" algorithm.  */
-               struct wimlib_lzx_slow_params {
-                       /** If set to 1, the compressor can output length 2
-                        * matches.  If set 0, the compressor can only output
-                        * matches of length 3 or greater.  Suggested value: 1
-                        */
-                       uint32_t use_len2_matches : 1;
-
-                       uint32_t slow_reserved1 : 31;
-
-                       /** Matches with length (in bytes) greater than or equal
-                        * to this value are immediately taken without spending
-                        * time on minimum-cost measurements.  Suggested value:
-                        * 32.  */
-                       uint32_t nice_match_length;
-
-                       /** Number of passes to compute a match/literal sequence
-                        * for each LZX block.  This is for an iterative
-                        * algorithm that attempts to minimize the cost of the
-                        * match/literal sequence by using a cost model provided
-                        * by the previous iteration.  Must be at least 1.
-                        * Suggested value: 2.  */
-                       uint32_t num_optim_passes;
-
-                       /** Reserved; set to 0.  */
-                       uint32_t slow_reserved_blocksplit;
-
-                       /** Maximum depth to search for matches at each
-                        * position.  Suggested value: 50.  */
-                       uint32_t max_search_depth;
-
-                       /* Note: max_matches_per_pos has been removed and no
-                        * longer has any effect.  */
-
-                       uint32_t slow_reserved2[3];
-
-                       /** Assumed cost of a main symbol with zero frequency.
-                        * Must be at least 1 and no more than 16.  Suggested
-                        * value: 15.  */
-                       uint8_t main_nostat_cost;
-
-                       /** Assumed cost of a length symbol with zero frequency.
-                        * Must be at least 1 and no more than 16.  Suggested
-                        * value: 15.  */
-                       uint8_t len_nostat_cost;
-
-                       /** Assumed cost of an aligned symbol with zero
-                        * frequency.  Must be at least 1 and no more than 8.
-                        * Suggested value: 7.  */
-                       uint8_t aligned_nostat_cost;
-
-                       uint8_t slow_reserved3[5];
-               } slow;
-       } alg_params;
-};
-
-/** LZMS compression parameters that can optionally be passed to
- * wimlib_create_compressor() with the compression type
- * ::WIMLIB_COMPRESSION_TYPE_LZMS.  */
-struct wimlib_lzms_compressor_params {
-       /** hdr.size Must be set to the size of this structure, in bytes.  */
-       struct wimlib_compressor_params_header hdr;
-
-       /** Minimum match length to output.  This must be at least 2.  Suggested
-        * value: 2  */
-       uint32_t min_match_length;
-
-       /** Maximum match length to output.  This must be at least @p
-        * min_match_length.  Suggested value: @p UINT32_MAX.  */
-       uint32_t max_match_length;
-
-       /** Matches with length (in bytes) greater than or equal to this value
-        * are immediately taken without spending time on minimum-cost
-        * measurements.  The minimum of @p max_match_length and @p
-        * nice_match_length may not exceed 65536.  Suggested value: 32.  */
-       uint32_t nice_match_length;
-
-       /** Maximum depth to search for matches at each position.  Suggested
-        * value: 50.  */
-       uint32_t max_search_depth;
-
-       /* Note: max_matches_per_pos has been removed and no longer has any
-        * effect.  */
-
-       uint32_t reserved1;
-
-       /** Length of the array for the near-optimal LZ parsing algorithm.  This
-        * must be at least 1.  Suggested value: 1024.  */
-       uint32_t optim_array_length;
-
-       uint64_t reserved2[4];
-};
-
  /** Opaque compressor handle.  */
  struct wimlib_compressor;
  
@@ -4424,41 +4286,40 @@ struct wimlib_compressor;
  struct wimlib_decompressor;
  
  /**
- * Set the default compression parameters for the specified compression type.
- * This will affect both explicit and library-internal calls to
+ * Set the default compression level for the specified compression type.  This
+ * will affect both explicit and library-internal calls to
   * wimlib_create_compressor().
   *
   * @param ctype
- *     Compression type for which to set the default compression parameters.
- * @param params
- *     Compression-type specific parameters.  This may be @c NULL, in which
- *     case the "default default" parameters are restored.
+ *     Compression type for which to set the default compression level.  Or, if
+ *     this is the special value -1, the default compression levels for all
+ *     known compression types will be set.
+ * @param compression_level
+ *     The default compression level to set.  If 0, the "default default" level
+ *     is restored.  Otherwise, a higher value indicates higher compression.
+ *     The values are scaled so that 10 is low compression, 50 is medium
+ *     compression, and 100 is high compression.
   *
   * @return 0 on success; nonzero on error.
   *
   * @retval ::WIMLIB_ERR_INVALID_COMPRESSION_TYPE
- *     @p ctype was not a supported compression type.
- * @retval ::WIMLIB_ERR_INVALID_PARAM
- *     @p params were invalid.
- * @retval ::WIMLIB_ERR_NOMEM
- *     Not enough memory to duplicate the parameters (perhaps @c params->size
- *     was invalid).
+ *     @p ctype was neither a supported compression type nor -1.
   */
  extern int
-wimlib_set_default_compressor_params(enum wimlib_compression_type ctype,
-                                    const struct wimlib_compressor_params_header *params);
+wimlib_set_default_compression_level(enum wimlib_compression_type ctype,
+                                    unsigned int compression_level);
  
  /**
   * Returns the approximate number of bytes needed to allocate a compressor with
   * wimlib_create_compressor() for the specified compression type, block size,
- * and parameters.  @p params may be @c NULL, in which case the current default
- * parameters for @p ctype are used.  Returns 0 if the compression type or
- * parameters are invalid.
+ * and compression level.  @p compression_level may be 0, in which case the
+ * current default compression level for @p ctype is used.  Returns 0 if the
+ * compression type is invalid.
   */
  extern uint64_t
  wimlib_get_compressor_needed_memory(enum wimlib_compression_type ctype,
                                     size_t max_block_size,
-                                   const struct wimlib_compressor_params_header *params);
+                                   unsigned int compression_level);
  
  /**
   * Allocate a compressor for the specified compression type using the specified
@@ -4471,12 +4332,11 @@ wimlib_get_compressor_needed_memory(enum wimlib_compression_type ctype,
   *     Maximum block size to support.  The exact meaning and allowed values for
   *     this parameter depend on the compression type, but it at least specifies
   *     the maximum allowed value for @p uncompressed_size to wimlib_compress().
- * @param extra_params
- *     An optional pointer to extra compressor parameters for the specified
- *     compression type.  For LZX, a pointer to ::wimlib_lzx_compressor_params
- *     may be specified here.  For LZMS, a pointer to
- *     ::wimlib_lzms_compressor_params may be specified here.  If left @c NULL,
- *     the default parameters are used.
+ * @param compression_level
+ *     The compression level to use.  If 0, the default compression level is
+ *     used.  Otherwise, a higher value indicates higher compression.  The
+ *     values are scaled so that 10 is low compression, 50 is medium
+ *     compression, and 100 is high compression.
   * @param compressor_ret
   *     A location into which to return the pointer to the allocated compressor,
   *     which can be used for any number of calls to wimlib_compress() before
@@ -4487,14 +4347,14 @@ wimlib_get_compressor_needed_memory(enum wimlib_compression_type ctype,
   * @retval ::WIMLIB_ERR_INVALID_COMPRESSION_TYPE
   *     @p ctype was not a supported compression type.
   * @retval ::WIMLIB_ERR_INVALID_PARAM
- *     The compression parameters were invalid.
+ *     The compressor does not support the specified maximum block size.
   * @retval ::WIMLIB_ERR_NOMEM
   *     Insufficient memory to allocate the compressor.
   */
  extern int
  wimlib_create_compressor(enum wimlib_compression_type ctype,
                          size_t max_block_size,
-                        const struct wimlib_compressor_params_header *extra_params,
+                        unsigned int compression_level,
                          struct wimlib_compressor **compressor_ret);
  
  /**
@@ -4530,29 +4390,6 @@ wimlib_compress(const void *uncompressed_data, size_t uncompressed_size,
  extern void
  wimlib_free_compressor(struct wimlib_compressor *compressor);
  
-/**
- * Set the default decompression parameters for the specified compression type.
- * This will affect both explicit and library-internal calls to
- * wimlib_create_decompressor().
- *
- * @param ctype
- *     Compression type for which to set the default decompression parameters.
- * @param params
- *     Compression-type specific parameters.  This may be @c NULL, in which
- *     case the "default default" parameters are restored.
- *
- * @return 0 on success; nonzero on error.
- *
- * @retval ::WIMLIB_ERR_INVALID_COMPRESSION_TYPE
- *     @p ctype was not a supported compression type.
- * @retval ::WIMLIB_ERR_NOMEM
- *     Not enough memory to duplicate the parameters (perhaps @c params->size
- *     was invalid).
- */
-extern int
-wimlib_set_default_decompressor_params(enum wimlib_compression_type ctype,
-                                      const struct wimlib_decompressor_params_header *params);
-
  /**
   * Allocate a decompressor for the specified compression type using the
   * specified parameters.  This function is part of wimlib's compression API; it
@@ -4565,9 +4402,6 @@ wimlib_set_default_decompressor_params(enum wimlib_compression_type ctype,
   *     this parameter depend on the compression type, but it at least specifies
   *     the maximum allowed value for @p uncompressed_size to
   *     wimlib_decompress().
- * @param extra_params
- *     An optional pointer to extra decompressor parameters for the specified
- *     compression type.  If @c NULL, the default parameters are used.
   * @param decompressor_ret
   *     A location into which to return the pointer to the allocated
   *     decompressor, which can be used for any number of calls to
@@ -4577,15 +4411,12 @@ wimlib_set_default_decompressor_params(enum wimlib_compression_type ctype,
   *
   * @retval ::WIMLIB_ERR_INVALID_COMPRESSION_TYPE
   *     @p ctype was not a supported compression type.
- * @retval ::WIMLIB_ERR_INVALID_PARAM
- *     The decompression parameters were invalid.
   * @retval ::WIMLIB_ERR_NOMEM
   *     Insufficient memory to allocate the decompressor.
   */
  extern int
  wimlib_create_decompressor(enum wimlib_compression_type ctype,
                            size_t max_block_size,
-                          const struct wimlib_decompressor_params_header *extra_params,
                            struct wimlib_decompressor **decompressor_ret);
  
  /**
diff --git a/include/wimlib/compressor_ops.h b/include/wimlib/compressor_ops.h

index 32bc62c8fb668aa521f5ef080c16a0cbabdffd7b..d155d401ebc543c559119b7a5a8c2a49bb2a71ab 100644 (file)
--- a/include/wimlib/compressor_ops.h
+++ b/include/wimlib/compressor_ops.h
@@ -9,17 +9,13 @@
  
  #include <wimlib/types.h>
  
-struct wimlib_compressor_params_header;
-
  struct compressor_ops {
  
-       bool (*params_valid)(const struct wimlib_compressor_params_header *params);
-
         u64 (*get_needed_memory)(size_t max_block_size,
-                                const struct wimlib_compressor_params_header *params);
+                                unsigned int compression_level);
  
         int (*create_compressor)(size_t max_block_size,
-                                const struct wimlib_compressor_params_header *params,
+                                unsigned int compression_level,
                                  void **private_ret);
  
         size_t (*compress)(const void *uncompressed_data,
@@ -35,7 +31,4 @@ extern const struct compressor_ops lzx_compressor_ops;
  extern const struct compressor_ops xpress_compressor_ops;
  extern const struct compressor_ops lzms_compressor_ops;
  
-extern void
-cleanup_compressor_params(void);
-
  #endif /* _WIMLIB_COMPRESSOR_OPS_H */
diff --git a/include/wimlib/decompressor_ops.h b/include/wimlib/decompressor_ops.h

index 7022dfffc05dc45c075eec2107862d7edaca247a..44148b32b9e83102cb4b5e5e29640f00144ccccb 100644 (file)
--- a/include/wimlib/decompressor_ops.h
+++ b/include/wimlib/decompressor_ops.h
@@ -9,13 +9,9 @@
  
  #include <stddef.h>
  
-struct wimlib_decompressor_params_header;
-
  struct decompressor_ops {
  
-       int (*create_decompressor)(size_t max_block_size,
-                                  const struct wimlib_decompressor_params_header *extra_params,
-                                  void **private_ret);
+       int (*create_decompressor)(size_t max_block_size, void **private_ret);
  
         int (*decompress)(const void *compressed_data,
                           size_t compressed_size,
@@ -30,7 +26,4 @@ extern const struct decompressor_ops lzx_decompressor_ops;
  extern const struct decompressor_ops xpress_decompressor_ops;
  extern const struct decompressor_ops lzms_decompressor_ops;
  
-extern void
-cleanup_decompressor_params(void);
-
  #endif /* _WIMLIB_DECOMPRESSOR_OPS_H */
diff --git a/include/wimlib/divsufsort.h b/include/wimlib/divsufsort.h

new file mode 100644 (file)

index 0000000..287c012
--- /dev/null
+++ b/include/wimlib/divsufsort.h
@@ -0,0 +1,12 @@
+#ifndef _WIMLIB_DIVSUFSORT_H
+#define _WIMLIB_DIVSUFSORT_H
+
+#include "wimlib/types.h"
+
+extern void
+divsufsort(const u8 *T, u32 *SA, u32 n, u32 *bucket_A, u32 *bucket_B);
+
+#define DIVSUFSORT_TMP1_LEN (256)              /* bucket_A  */
+#define DIVSUFSORT_TMP2_LEN (256 * 256)                /* bucket_B  */
+
+#endif /* _WIMLIB_DIVSUFSORT_H */
diff --git a/include/wimlib/lz.h b/include/wimlib/lz.h

deleted file mode 100644 (file)

index 7870c97..0000000
--- a/include/wimlib/lz.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef _WIMLIB_LZ_H
-#define _WIMLIB_LZ_H
-
-#include "wimlib/compress_common.h"
-
-//#define ENABLE_LZ_DEBUG
-#ifdef ENABLE_LZ_DEBUG
-#  define LZ_ASSERT wimlib_assert
-#  include "wimlib/assert.h"
-#else
-#  define LZ_ASSERT(...)
-#endif
-
-
-/* Raw LZ match/literal format: just a length and offset.
- *
- * The length is the number of bytes of the match, and the offset is the number
- * of bytes back in the input the match is from the current position.
- *
- * This can alternatively be used to represent a literal byte if @len is less
- * than the minimum match length.  */
-struct lz_match {
-       u32 len;
-       u32 offset;
-};
-
-#endif /* _WIMLIB_LZ_H */
diff --git a/include/wimlib/lz_bt.h b/include/wimlib/lz_bt.h

deleted file mode 100644 (file)

index 6ce525a..0000000
--- a/include/wimlib/lz_bt.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * lz_bt.h
- *
- * Binary tree match-finder for Lempel-Ziv compression.
- *
- * Author:  Eric Biggers
- * Year:    2014
- *
- * The author dedicates this file to the public domain.
- * You can do whatever you want with this file.
- */
-
-#ifndef _WIMLIB_LZ_BT_H
-#define _WIMLIB_LZ_BT_H
-
-#include "wimlib/types.h"
-
-/* Position type for the binary tree match-finder.
- * This can be changed to 'u16' if no window will exceed 65536 bytes.  */
-typedef u32 lz_bt_pos_t;
-
-/* Match length type for the binary tree match-finder.  */
-typedef unsigned lz_bt_len_t;
-
-/* The binary tree match-finder structure.  */
-struct lz_bt {
-       lz_bt_pos_t *hash_tab;
-       lz_bt_pos_t *digram_tab;
-       lz_bt_pos_t *child_tab;
-       const u8 *cur_window;
-       lz_bt_pos_t cur_window_pos;
-       lz_bt_pos_t cur_window_size;
-       lz_bt_pos_t max_window_size;
-       lz_bt_len_t min_match_len;
-       lz_bt_len_t max_match_len;
-       lz_bt_len_t num_fast_bytes;
-       u32 max_search_depth;
-};
-
-struct lz_match;
-
-extern u64
-lz_bt_get_needed_memory(lz_bt_pos_t max_window_size);
-
-extern bool
-lz_bt_init(struct lz_bt *mf,
-          lz_bt_pos_t max_window_size,
-          lz_bt_len_t min_match_len,
-          lz_bt_len_t max_match_len,
-          lz_bt_len_t num_fast_bytes,
-          u32 max_search_depth);
-
-extern void
-lz_bt_load_window(struct lz_bt *mf, const u8 *window, lz_bt_pos_t window_size);
-
-extern lz_bt_len_t
-lz_bt_get_matches(struct lz_bt *mf, struct lz_match *matches);
-
-static inline lz_bt_pos_t
-lz_bt_get_position(const struct lz_bt *mf)
-{
-       return mf->cur_window_pos;
-}
-
-static inline const u8 *
-lz_bt_get_window_ptr(const struct lz_bt *mf)
-{
-       return &mf->cur_window[mf->cur_window_pos];
-}
-
-static inline lz_bt_pos_t
-lz_bt_get_remaining_size(const struct lz_bt *mf)
-{
-       return mf->cur_window_size - mf->cur_window_pos;
-}
-
-extern void
-lz_bt_skip_positions(struct lz_bt *mf, unsigned n);
-
-extern void
-lz_bt_destroy(struct lz_bt *mf);
-
-#endif /* _WIMLIB_LZ_BT_H */
diff --git a/include/wimlib/lz_hash.h b/include/wimlib/lz_hash.h

deleted file mode 100644 (file)

index 9a856a5..0000000
--- a/include/wimlib/lz_hash.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _WIMLIB_LZ_HASH_H
-#define _WIMLIB_LZ_HASH_H
-
-#include "wimlib/compress_common.h"
-
-struct lz_params {
-       unsigned min_match;
-       unsigned max_match;
-       unsigned max_offset;
-       unsigned nice_match;
-       unsigned good_match;
-       unsigned max_chain_len;
-       unsigned max_lazy_match;
-       unsigned too_far;
-};
-
-typedef void (*lz_record_match_t)(unsigned len, unsigned offset, void *ctx);
-typedef void (*lz_record_literal_t)(u8 lit, void *ctx);
-
-extern void
-lz_analyze_block(const u8 window[restrict],
-                u32 window_size,
-                lz_record_match_t record_match,
-                lz_record_literal_t record_literal,
-                void *record_ctx,
-                const struct lz_params *params,
-                u32 prev_tab[restrict]);
-
-
-#endif /* _WIMLIB_LZ_HASH_H  */
diff --git a/include/wimlib/lz_mf.h b/include/wimlib/lz_mf.h

new file mode 100644 (file)

index 0000000..c94fce7
--- /dev/null
+++ b/include/wimlib/lz_mf.h
@@ -0,0 +1,406 @@
+/*
+ * lz_mf.h
+ *
+ * Interface for Lempel-Ziv match-finders.
+ *
+ * Copyright (c) 2014 Eric Biggers.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Example usage of the match-finder API:
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Fill in a 'struct lz_mf_params'.
+ * (Optional) Call lz_mf_params_valid() to validate the parameters.
+ * Call lz_mf_alloc() to allocate the match-finder.
+ * For each block of data to be compressed:
+ *     Call lz_mf_load_window() to load the block into the match finder.
+ *     While the block is not yet fully compressed:
+ *             Call lz_mf_get_matches() to get matches at the current position.
+ *             If matches were found:
+ *                     Output the longest match.
+ *                     Call lz_mf_skip_positions() to skip the remaining length of the match.
+ *             Else:
+ *                     Output a literal.
+ *             End If
+ *     End While
+ * End For
+ * Call lz_mf_free() to free the match-finder.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * That example did "greedy parsing" --- that is, always choosing the longest
+ * match at each position.  However, this interface can be (and is intended to
+ * be) used for "optimal parsing" as well.  It can also be used for in-between
+ * strategies such as "lazy parsing" and "flexible parsing".  For the best
+ * performance try different match-finding algorithms and parameters to see what
+ * works best for your parsing strategy, and your typical data and block sizes.
+ */
+
+#ifndef _WIMLIB_LZ_MF_H
+#define _WIMLIB_LZ_MF_H
+
+#include "wimlib/types.h"
+
+/* When ENABLE_LZ_DEBUG is defined, we check all matches for correctness and
+ * perform other validations.  Use for debugging only, as it slows things down
+ * significantly.  */
+
+//#define ENABLE_LZ_DEBUG
+#ifdef ENABLE_LZ_DEBUG
+#  include <assert.h>
+#  include <string.h>
+#  define LZ_ASSERT assert
+#else
+#  define LZ_ASSERT(...)
+#endif
+
+struct lz_mf;
+
+/* Representation of a Lempel-Ziv match.  */
+struct lz_match {
+       /* The number of bytes matched.  */
+       u32 len;
+
+       /* The offset back from the current position that was matched.  */
+       u32 offset;
+};
+
+/*
+ * Specifies a match-finding algorithm.
+ */
+enum lz_mf_algo {
+
+       /*
+        * Use the default algorithm for the specified maximum window size.
+        */
+       LZ_MF_DEFAULT = 0,
+
+       /*
+        * "Null" algorithm that never reports any matches.
+        *
+        * This algorithm exists for comparison, benchmarking, and testing
+        * purposes only.  It is not intended to be used in real compressors.
+        */
+       LZ_MF_NULL = 1,
+
+       /*
+        * Brute Force match-finding algorithm.
+        *
+        * This algorithm exists for comparison, benchmarking, and testing
+        * purposes only.  It is not intended to be used in real compressors.
+        */
+       LZ_MF_BRUTE_FORCE = 2,
+
+       /*
+        * Hash Chain match-finding algorithm.
+        *
+        * This works well on small windows.
+        *
+        * The memory usage is 4 bytes per position, plus 131072 bytes for a
+        * hash table.
+        *
+        * lz_mf_skip_positions() with this algorithm is very fast, so it's good
+        * if you're doing "greedy" rather than "optimal" parsing.  However, if
+        * using large windows you might be better off with binary trees or
+        * suffix arrays, even if doing greedy parsing.
+        */
+       LZ_MF_HASH_CHAINS = 3,
+
+       /*
+        * Binary Tree match-finding algorithm.
+        *
+        * This works well on small to medium-sized windows.
+        *
+        * The memory usage is 8 bytes per position, plus 262144 bytes for a
+        * hash table.
+        *
+        * lz_mf_skip_positions() with this algorithm takes a significant amount
+        * of time, almost as much as a call to lz_mf_get_matches().  This makes
+        * this algorithm better suited for optimal parsing than for greedy
+        * parsing.  However, if the window size becomes sufficiently large,
+        * this algorithm can outperform hash chains, even when using greedy
+        * parsing.
+        */
+       LZ_MF_BINARY_TREES = 4,
+
+       /*
+        * Longest Common Prefix Interval Tree match-finding algorithm.
+        *
+        * This is a suffix array-based algorithm.  It works well on medium to
+        * large windows.  However, due to an implementation detail, it is
+        * currently limited to a maximum window size of 33554432 bytes.
+        *
+        * The memory usage is 12 bytes per position.
+        *
+        * Unlike the hash chain and binary tree algorithms, the LCP interval
+        * tree algorithm performs most of its work in lz_mf_load_window().  The
+        * calls to lz_mf_get_matches() and lz_mf_skip_positions() take
+        * relatively little time, and lz_mf_skip_positions() is not much faster
+        * than lz_mf_get_matches().  Therefore, if you're using this algorithm
+        * you might as well be doing "optimal" rather than "greedy" parsing.
+        */
+       LZ_MF_LCP_INTERVAL_TREE = 5,
+
+       /*
+        * Linked Suffix Array match-finding algorithm.
+        *
+        * This can be used on very large windows.
+        *
+        * The memory usage is 14 bytes per position.
+        *
+        * Currently, this method usually performs slightly worse than the LCP
+        * interval tree algorithm.  However, it can be used on windows
+        * exceeding the 33554432 byte limit of the LCP interval tree algorithm.
+        */
+       LZ_MF_LINKED_SUFFIX_ARRAY = 6,
+};
+
+/* Parameters for Lempel-Ziv match-finding.  */
+struct lz_mf_params {
+
+       /*
+        * The match-finding algorithm to use.  This must be one of the 'enum
+        * lz_mf_algo' constants defined above.
+        *
+        * If this is LZ_MF_DEFAULT, the default algorithm for the specified
+        * @max_window_size is used.
+        */
+       u32 algorithm;
+
+       /*
+        * The maximum window size, in bytes, that shall be supported by the
+        * match-finder.  This is the maximum size that can be passed to
+        * subsequent calls to lz_mf_load_window().
+        *
+        * Note: this interface is intended to be used for block compression, so
+        * none of the match-finding algorithms support sliding windows.  It's
+        * expected that the window for LZ match-finding simply be the block of
+        * data being compressed.
+        *
+        * Match-finders generally require an amount of memory proportional to
+        * this parameter.  Use lz_mf_get_needed_memory() to query the needed
+        * memory size for a specific match-finding algorithm and maximum window
+        * size.
+        *
+        * This parameter cannot be 0; there is no default value.
+        *
+        * Match-finding algorithms may place additional restrictions on this
+        * parameter.  However, currently only the LCP interval tree
+        * match-finding algorithm places such a restriction (it doesn't support
+        * windows larger than 33554432 bytes).
+        */
+       u32 max_window_size;
+
+       /*
+        * The minimum length, in bytes, of matches that can be produced by the
+        * match-finder (by a call to lz_mf_get_matches()).
+        *
+        * If this parameter is not 0, it must be 2 or greater.
+        *
+        * If this parameter is 0, the match-finding algorithm sets it to a
+        * default value.  The default value will be at least 2 and at most 16.
+        */
+       u32 min_match_len;
+
+       /*
+        * The maximum length, in bytes, of matches that can be produced by the
+        * match-finder (by a call to lz_mf_get_matches()).
+        *
+        * If this parameter is not 0, it must be greater than or equal to
+        * @min_match_len, or the default value the match-finding algorithm
+        * selected for @min_match_len in the case that @min_match_len was
+        * specified as 0.
+        *
+        * If this parameter is 0, the match-finding algorithm sets it to a
+        * default value.  In general, the caller must be prepared to handle
+        * arbitrarily long matches (up to the window size minus 1) in this
+        * case.
+        */
+       u32 max_match_len;
+
+       /*
+        * When using the hash chains or binary trees match-finding algorithm,
+        * this parameter defines the maximum number of search steps at each
+        * position.  A typical value to use is 32.  Higher values result in
+        * better matches and slower performance.
+        *
+        * The suffix array-based match-finding algorithms treat this parameter
+        * slightly differently because they find the longest matches first.
+        * They still honor the intent of the parameter but may scale it down to
+        * an appropriate value.
+        *
+        * If this parameter is 0, the match-finding algorithm sets it to a
+        * default value.
+        */
+       u32 max_search_depth;
+
+       /*
+        * When using the hash chains, binary trees, or LCP interval tree
+        * match-finding algorithm, this parameter defines the maximum match
+        * length to which the full algorithm will be applied.  This can also be
+        * thought of as the length above which the algorithm will not try to
+        * search for additional matches.
+        *
+        * Usually, setting this parameter to a reasonable value (such as 24,
+        * 32, or 48) will speed up match-finding but will not hurt the
+        * compression ratio too much.  This is because these settings of this
+        * parameter cause the match-finder to not waste too much time examining
+        * very long matches, which are already highly compressible.
+        *
+        * In addition, if the longest match exceeds this length, the
+        * match-finding algorithm will still report its full length.
+        *
+        * The linked suffix array match-finding algorithm ignores this
+        * parameter.
+        *
+        * If this parameter is 0, the match-finding algorithm sets it to a
+        * default value.
+        */
+       u32 nice_match_len;
+};
+
+/*
+ * Lempel-Ziv match-finder operations structure.
+ *
+ * Match-finding algorithms must fill in all members.  None can be left as 0 or
+ * NULL.
+ *
+ * Don't directly access any of the members outside of lz_mf.h and lz_mf.c.
+ * Instead, use the lz_mf_*() wrappers.
+ */
+struct lz_mf_ops {
+       bool (*params_valid)(const struct lz_mf_params *);
+
+       u64 (*get_needed_memory)(u32 max_window_size);
+
+       bool (*init)(struct lz_mf *);
+
+       void (*load_window)(struct lz_mf *mf, const u8 *, u32);
+
+       u32 (*get_matches)(struct lz_mf *, struct lz_match *);
+
+       void (*skip_positions)(struct lz_mf *, u32);
+
+       void (*destroy)(struct lz_mf *);
+
+       size_t struct_size;
+};
+
+/*
+ * Lempel-Ziv match-finder structure.
+ *
+ * Match-finding algorithms must embed this structure inside a private
+ * structure.
+ *
+ * Don't directly access any of the members outside of lz_mf.h, lz_mf.c, and
+ * match-finding algorithms.  Instead, use the lz_mf_*() wrappers.
+ */
+struct lz_mf {
+       struct lz_mf_params params;
+       struct lz_mf_ops ops;
+       const u8 *cur_window;
+       u32 cur_window_pos;
+       u32 cur_window_size;
+};
+
+extern bool
+lz_mf_params_valid(const struct lz_mf_params *params);
+
+extern u64
+lz_mf_get_needed_memory(enum lz_mf_algo algorithm, u32 max_window_size);
+
+extern struct lz_mf *
+lz_mf_alloc(const struct lz_mf_params *params);
+
+extern void
+lz_mf_load_window(struct lz_mf *mf, const u8 *window, u32 size);
+
+#ifdef ENABLE_LZ_DEBUG
+extern u32
+lz_mf_get_matches(struct lz_mf *mf, struct lz_match *matches);
+#else
+/* See non-inline definition for comment  */
+static inline u32
+lz_mf_get_matches(struct lz_mf *mf, struct lz_match *matches)
+{
+       return mf->ops.get_matches(mf, matches);
+}
+#endif
+
+#ifdef ENABLE_LZ_DEBUG
+extern void
+lz_mf_skip_positions(struct lz_mf *mf, u32 n);
+#else
+/* See non-inline definition for comment  */
+static inline void
+lz_mf_skip_positions(struct lz_mf *mf, u32 n)
+{
+       mf->ops.skip_positions(mf, n);
+}
+#endif
+
+extern void
+lz_mf_free(struct lz_mf *mf);
+
+/*
+ * Returns the match-finder's current position in the window.
+ *
+ * The current position begins at 0.  It increases by 1 when lz_mf_get_matches()
+ * is called, and by 'n' when lz_mf_skip_positions() is called.
+ *
+ * Note: The behavior is undefined if the match-finder is advanced beyond the
+ * end of the window.  (If this happens in ENABLE_LZ_DEBUG mode, an assertion
+ * will be triggered.)
+ */
+static inline u32
+lz_mf_get_position(const struct lz_mf *mf)
+{
+       return mf->cur_window_pos;
+}
+
+/*
+ * Returns the number of bytes remaining in the window.
+ */
+static inline u32
+lz_mf_get_bytes_remaining(const struct lz_mf *mf)
+{
+       return mf->cur_window_size - mf->cur_window_pos;
+}
+
+/*
+ * Returns a pointer to the current window, offset by the current position.
+ * Equivalently, this returns a pointer to the byte sequence that the next call
+ * to lz_mf_get_matches() will match against.
+ */
+static inline const u8 *
+lz_mf_get_window_ptr(const struct lz_mf *mf)
+{
+       return &mf->cur_window[mf->cur_window_pos];
+}
+
+#endif /* _WIMLIB_LZ_MF_H */
diff --git a/include/wimlib/lz_mf_ops.h b/include/wimlib/lz_mf_ops.h

new file mode 100644 (file)

index 0000000..0e1cfe9
--- /dev/null
+++ b/include/wimlib/lz_mf_ops.h
@@ -0,0 +1,8 @@
+#include "wimlib/lz_mf.h"
+
+extern const struct lz_mf_ops lz_null_ops;
+extern const struct lz_mf_ops lz_brute_force_ops;
+extern const struct lz_mf_ops lz_hash_chains_ops;
+extern const struct lz_mf_ops lz_binary_trees_ops;
+extern const struct lz_mf_ops lz_lcp_interval_tree_ops;
+extern const struct lz_mf_ops lz_linked_suffix_array_ops;
diff --git a/include/wimlib/lz_suffix_array_utils.h b/include/wimlib/lz_suffix_array_utils.h

new file mode 100644 (file)

index 0000000..d4dcbe8
--- /dev/null
+++ b/include/wimlib/lz_suffix_array_utils.h
@@ -0,0 +1,17 @@
+#ifndef _WIMLIB_LZ_SUFFIX_ARRAY_UTILS_H
+#define _WIMLIB_LZ_SUFFIX_ARRAY_UTILS_H
+
+#include "wimlib/types.h"
+
+#define BUILD_SA_MIN_TMP_LEN (65536 + 256)
+
+extern void
+build_SA(u32 *SA, const u8 *T, u32 n, u32 *tmp);
+
+extern void
+build_ISA(u32 *ISA, const u32 *SA, u32 n);
+
+extern void
+build_LCP(u32 *LCP, const u32 *SA, const u32 *ISA, const u8 *T, u32 n);
+
+#endif /* _WIMLIB_LZ_SUFFIX_ARRAY_UTILS_H */
diff --git a/include/wimlib/lzx.h b/include/wimlib/lzx.h

index ae1c6a9c696ec28bedae3fca69675d48776ac302..539edf303591eb168c53820486b7f7e794f3abea 100644 (file)
--- a/include/wimlib/lzx.h
+++ b/include/wimlib/lzx.h
@@ -147,7 +147,7 @@ struct lzx_lru_queue {
  #ifdef __x86_64__
  _aligned_attribute(8)  /* Improves performance of LZX compression by 1% - 2%;
                           specifically, this speeds up
-                         lzx_get_near_optimal_match().  */
+                         lzx_choose_near_optimal_match().  */
  #endif
  ;
  
diff --git a/programs/imagex.c b/programs/imagex.c

index 1c077b7f2fae1afb1db12fc53cf0c99b8894c4a1..6dd8bdbb03df7b4f9b125cc9e9c328dd227a5839 100644 (file)
--- a/programs/imagex.c
+++ b/programs/imagex.c
@@ -171,9 +171,6 @@ enum {
         IMAGEX_NO_GLOBS_OPTION,
         IMAGEX_NULLGLOB_OPTION,
         IMAGEX_ONE_FILE_ONLY_OPTION,
-       IMAGEX_PACK_CHUNK_SIZE_OPTION,
-       IMAGEX_PACK_COMPRESS_OPTION,
-       IMAGEX_PACK_STREAMS_OPTION,
         IMAGEX_PATH_OPTION,
         IMAGEX_PIPABLE_OPTION,
         IMAGEX_PRESERVE_DIR_STRUCTURE_OPTION,
@@ -184,6 +181,9 @@ enum {
         IMAGEX_RESUME_OPTION,
         IMAGEX_RPFIX_OPTION,
         IMAGEX_SOFT_OPTION,
+       IMAGEX_SOLID_OPTION,
+       IMAGEX_SOLID_CHUNK_SIZE_OPTION,
+       IMAGEX_SOLID_COMPRESS_OPTION,
         IMAGEX_SOURCE_LIST_OPTION,
         IMAGEX_STAGING_DIR_OPTION,
         IMAGEX_STREAMS_INTERFACE_OPTION,
@@ -225,12 +225,12 @@ static const struct option capture_or_append_options[] = {
         {T("compress"),    required_argument, NULL, IMAGEX_COMPRESS_OPTION},
         {T("compress-slow"), no_argument,     NULL, IMAGEX_COMPRESS_SLOW_OPTION},
         {T("chunk-size"),  required_argument, NULL, IMAGEX_CHUNK_SIZE_OPTION},
-       {T("pack-chunk-size"), required_argument, NULL, IMAGEX_PACK_CHUNK_SIZE_OPTION},
-       {T("solid-chunk-size"),required_argument, NULL, IMAGEX_PACK_CHUNK_SIZE_OPTION},
-       {T("pack-compress"), required_argument, NULL, IMAGEX_PACK_COMPRESS_OPTION},
-       {T("solid-compress"),required_argument, NULL, IMAGEX_PACK_COMPRESS_OPTION},
-       {T("pack-streams"), no_argument,      NULL, IMAGEX_PACK_STREAMS_OPTION},
-       {T("solid"),       no_argument,      NULL, IMAGEX_PACK_STREAMS_OPTION},
+       {T("solid"),       no_argument,      NULL, IMAGEX_SOLID_OPTION},
+       {T("pack-streams"), no_argument,      NULL, IMAGEX_SOLID_OPTION},
+       {T("solid-compress"),required_argument, NULL, IMAGEX_SOLID_COMPRESS_OPTION},
+       {T("pack-compress"), required_argument, NULL, IMAGEX_SOLID_COMPRESS_OPTION},
+       {T("solid-chunk-size"),required_argument, NULL, IMAGEX_SOLID_CHUNK_SIZE_OPTION},
+       {T("pack-chunk-size"), required_argument, NULL, IMAGEX_SOLID_CHUNK_SIZE_OPTION},
         {T("config"),      required_argument, NULL, IMAGEX_CONFIG_OPTION},
         {T("dereference"), no_argument,       NULL, IMAGEX_DEREFERENCE_OPTION},
         {T("flags"),       required_argument, NULL, IMAGEX_FLAGS_OPTION},
@@ -273,13 +273,13 @@ static const struct option export_options[] = {
         {T("compress"),    required_argument, NULL, IMAGEX_COMPRESS_OPTION},
         {T("recompress"),  no_argument,       NULL, IMAGEX_RECOMPRESS_OPTION},
         {T("compress-slow"), no_argument,     NULL, IMAGEX_COMPRESS_SLOW_OPTION},
-       {T("pack-streams"),no_argument,       NULL, IMAGEX_PACK_STREAMS_OPTION},
-       {T("solid"),       no_argument,       NULL, IMAGEX_PACK_STREAMS_OPTION},
         {T("chunk-size"),  required_argument, NULL, IMAGEX_CHUNK_SIZE_OPTION},
-       {T("pack-chunk-size"), required_argument, NULL, IMAGEX_PACK_CHUNK_SIZE_OPTION},
-       {T("solid-chunk-size"),required_argument, NULL, IMAGEX_PACK_CHUNK_SIZE_OPTION},
-       {T("pack-compress"), required_argument, NULL, IMAGEX_PACK_COMPRESS_OPTION},
-       {T("solid-compress"),required_argument, NULL, IMAGEX_PACK_COMPRESS_OPTION},
+       {T("solid"),       no_argument,       NULL, IMAGEX_SOLID_OPTION},
+       {T("pack-streams"),no_argument,       NULL, IMAGEX_SOLID_OPTION},
+       {T("solid-compress"),required_argument, NULL, IMAGEX_SOLID_COMPRESS_OPTION},
+       {T("pack-compress"), required_argument, NULL, IMAGEX_SOLID_COMPRESS_OPTION},
+       {T("solid-chunk-size"),required_argument, NULL, IMAGEX_SOLID_CHUNK_SIZE_OPTION},
+       {T("pack-chunk-size"), required_argument, NULL, IMAGEX_SOLID_CHUNK_SIZE_OPTION},
         {T("ref"),         required_argument, NULL, IMAGEX_REF_OPTION},
         {T("threads"),     required_argument, NULL, IMAGEX_THREADS_OPTION},
         {T("rebuild"),     no_argument,       NULL, IMAGEX_REBUILD_OPTION},
@@ -345,14 +345,14 @@ static const struct option optimize_options[] = {
         {T("compress"),    required_argument, NULL, IMAGEX_COMPRESS_OPTION},
         {T("recompress"),  no_argument,       NULL, IMAGEX_RECOMPRESS_OPTION},
         {T("compress-slow"), no_argument,     NULL, IMAGEX_COMPRESS_SLOW_OPTION},
-       {T("recompress-slow"), no_argument,     NULL, IMAGEX_COMPRESS_SLOW_OPTION},
+       {T("recompress-slow"), no_argument,   NULL, IMAGEX_COMPRESS_SLOW_OPTION},
         {T("chunk-size"),  required_argument, NULL, IMAGEX_CHUNK_SIZE_OPTION},
-       {T("pack-chunk-size"), required_argument, NULL, IMAGEX_PACK_CHUNK_SIZE_OPTION},
-       {T("solid-chunk-size"),required_argument, NULL, IMAGEX_PACK_CHUNK_SIZE_OPTION},
-       {T("pack-compress"), required_argument, NULL, IMAGEX_PACK_COMPRESS_OPTION},
-       {T("solid-compress"),required_argument, NULL, IMAGEX_PACK_COMPRESS_OPTION},
-       {T("pack-streams"),no_argument,       NULL, IMAGEX_PACK_STREAMS_OPTION},
-       {T("solid"),       no_argument,       NULL, IMAGEX_PACK_STREAMS_OPTION},
+       {T("solid"),       no_argument,       NULL, IMAGEX_SOLID_OPTION},
+       {T("pack-streams"),no_argument,       NULL, IMAGEX_SOLID_OPTION},
+       {T("solid-compress"),required_argument, NULL, IMAGEX_SOLID_COMPRESS_OPTION},
+       {T("pack-compress"), required_argument, NULL, IMAGEX_SOLID_COMPRESS_OPTION},
+       {T("solid-chunk-size"),required_argument, NULL, IMAGEX_SOLID_CHUNK_SIZE_OPTION},
+       {T("pack-chunk-size"), required_argument, NULL, IMAGEX_SOLID_CHUNK_SIZE_OPTION},
         {T("threads"),     required_argument, NULL, IMAGEX_THREADS_OPTION},
         {T("pipable"),     no_argument,       NULL, IMAGEX_PIPABLE_OPTION},
         {T("not-pipable"), no_argument,       NULL, IMAGEX_NOT_PIPABLE_OPTION},
@@ -474,64 +474,74 @@ verify_image_exists_and_is_single(int image, const tchar *image_name,
         return ret;
  }
  
+static void
+print_available_compression_types(FILE *fp)
+{
+       static const tchar *s =
+       T(
+       "Available compression types:\n"
+       "\n"
+       "    none\n"
+       "    xpress (alias: \"fast\")\n"
+       "    lzx    (alias: \"maximum\") (default for capture)\n"
+       "    lzms   (alias: \"recovery\")\n"
+       "\n"
+       );
+       tfputs(s, fp);
+}
+
  /* Parse the argument to --compress */
  static int
-get_compression_type(const tchar *optarg)
+get_compression_type(tchar *optarg)
  {
+       int ctype;
+       unsigned int compression_level = 0;
+       tchar *plevel;
+
+       plevel = tstrchr(optarg, T(':'));
+       if (plevel) {
+               tchar *ptmp;
+               unsigned long ultmp;
+
+               *plevel++ = T('\0');
+               ultmp = tstrtoul(plevel, &ptmp, 10);
+               if (ultmp >= UINT_MAX || ultmp == 0 || *ptmp || ptmp == plevel) {
+                       imagex_error(T("Compression level must be a positive integer! "
+                                      "e.g. --compress=lzx:80"));
+                       return WIMLIB_COMPRESSION_TYPE_INVALID;
+               }
+               compression_level = ultmp;
+       }
+
         if (!tstrcasecmp(optarg, T("maximum")) ||
             !tstrcasecmp(optarg, T("lzx")) ||
             !tstrcasecmp(optarg, T("max")))
-               return WIMLIB_COMPRESSION_TYPE_LZX;
+               ctype = WIMLIB_COMPRESSION_TYPE_LZX;
         else if (!tstrcasecmp(optarg, T("fast")) || !tstrcasecmp(optarg, T("xpress")))
-               return WIMLIB_COMPRESSION_TYPE_XPRESS;
+               ctype = WIMLIB_COMPRESSION_TYPE_XPRESS;
         else if (!tstrcasecmp(optarg, T("recovery")) || !tstrcasecmp(optarg, T("lzms")))
-               return WIMLIB_COMPRESSION_TYPE_LZMS;
+               ctype = WIMLIB_COMPRESSION_TYPE_LZMS;
         else if (!tstrcasecmp(optarg, T("none")))
-               return WIMLIB_COMPRESSION_TYPE_NONE;
+               ctype = WIMLIB_COMPRESSION_TYPE_NONE;
         else {
-               imagex_error(T("Invalid compression type \"%"TS"\"! Must be "
-                            "\"maximum\", \"fast\", or \"none\"."), optarg);
+               imagex_error(T("Invalid compression type \"%"TS"\"!"), optarg);
+               print_available_compression_types(stderr);
                 return WIMLIB_COMPRESSION_TYPE_INVALID;
         }
+
+       if (compression_level != 0)
+               wimlib_set_default_compression_level(ctype, compression_level);
+       return ctype;
  }
  
  static void
  set_compress_slow(void)
  {
-       static const struct wimlib_lzx_compressor_params lzx_slow_params = {
-               .hdr = {
-                       .size = sizeof(struct wimlib_lzx_compressor_params),
-               },
-               .algorithm = WIMLIB_LZX_ALGORITHM_SLOW,
-               .alg_params = {
-                       .slow = {
-                               .use_len2_matches = 1,
-                               .nice_match_length = 96,
-                               .num_optim_passes = 4,
-                               .max_search_depth = 100,
-                               .main_nostat_cost = 15,
-                               .len_nostat_cost = 15,
-                               .aligned_nostat_cost = 7,
-                       },
-               },
-       };
-
-       static const struct wimlib_lzms_compressor_params lzms_slow_params = {
-               .hdr = {
-                       .size = sizeof(struct wimlib_lzms_compressor_params),
-               },
-               .min_match_length = 2,
-               .max_match_length = UINT32_MAX,
-               .nice_match_length = 96,
-               .max_search_depth = 100,
-               .optim_array_length = 1024,
-       };
-
-       wimlib_set_default_compressor_params(WIMLIB_COMPRESSION_TYPE_LZX,
-                                            &lzx_slow_params.hdr);
-
-       wimlib_set_default_compressor_params(WIMLIB_COMPRESSION_TYPE_LZMS,
-                                            &lzms_slow_params.hdr);
+#if 0
+       fprintf(stderr, "WARNING: the '--compress-slow' option is deprecated.\n"
+                       "         Use the '--compress=TYPE:LEVEL' option instead.\n");
+#endif
+       wimlib_set_default_compression_level(-1, 100);
  }
  
  struct string_set {
@@ -1661,8 +1671,8 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
         int write_flags = 0;
         int compression_type = WIMLIB_COMPRESSION_TYPE_INVALID;
         uint32_t chunk_size = UINT32_MAX;
-       uint32_t pack_chunk_size = UINT32_MAX;
-       int pack_ctype = WIMLIB_COMPRESSION_TYPE_INVALID;
+       uint32_t solid_chunk_size = UINT32_MAX;
+       int solid_ctype = WIMLIB_COMPRESSION_TYPE_INVALID;
         const tchar *wimfile;
         int wim_fd;
         const tchar *name;
@@ -1693,7 +1703,6 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
         struct wimlib_capture_source *capture_sources;
         size_t num_sources;
         bool name_defaulted;
-       bool compress_slow = false;
  
         for_opt(c, capture_or_append_options) {
                 switch (c) {
@@ -1717,24 +1726,24 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
                                 goto out_err;
                         break;
                 case IMAGEX_COMPRESS_SLOW_OPTION:
-                       compress_slow = true;
+                       set_compress_slow();
                         break;
                 case IMAGEX_CHUNK_SIZE_OPTION:
                         chunk_size = parse_chunk_size(optarg);
                         if (chunk_size == UINT32_MAX)
                                 goto out_err;
                         break;
-               case IMAGEX_PACK_CHUNK_SIZE_OPTION:
-                       pack_chunk_size = parse_chunk_size(optarg);
-                       if (pack_chunk_size == UINT32_MAX)
+               case IMAGEX_SOLID_CHUNK_SIZE_OPTION:
+                       solid_chunk_size = parse_chunk_size(optarg);
+                       if (solid_chunk_size == UINT32_MAX)
                                 goto out_err;
                         break;
-               case IMAGEX_PACK_COMPRESS_OPTION:
-                       pack_ctype = get_compression_type(optarg);
-                       if (pack_ctype == WIMLIB_COMPRESSION_TYPE_INVALID)
+               case IMAGEX_SOLID_COMPRESS_OPTION:
+                       solid_ctype = get_compression_type(optarg);
+                       if (solid_ctype == WIMLIB_COMPRESSION_TYPE_INVALID)
                                 goto out_err;
                         break;
-               case IMAGEX_PACK_STREAMS_OPTION:
+               case IMAGEX_SOLID_OPTION:
                         write_flags |= WIMLIB_WRITE_FLAG_PACK_STREAMS;
                         break;
                 case IMAGEX_FLAGS_OPTION:
@@ -1834,29 +1843,15 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
                         /* With --wimboot, default to XPRESS compression.  */
                         compression_type = WIMLIB_COMPRESSION_TYPE_XPRESS;
                 } else if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) {
-                       /* With --pack-streams or --solid, default to LZMS
-                        * compression.  (However, this will not affect packed
-                        * resources!)  */
+                       /* With --solid, default to LZMS compression.  (However,
+                        * this will not affect solid blocks!)  */
                         compression_type = WIMLIB_COMPRESSION_TYPE_LZMS;
                 } else {
-                       /* Otherwise, default to LZX compression in fast mode.
-                        */
+                       /* Otherwise, default to LZX compression.  */
                         compression_type = WIMLIB_COMPRESSION_TYPE_LZX;
-                       if (!compress_slow && pack_ctype != WIMLIB_COMPRESSION_TYPE_LZX) {
-                               struct wimlib_lzx_compressor_params params = {
-                                       .hdr.size = sizeof(params),
-                                       .algorithm = WIMLIB_LZX_ALGORITHM_FAST,
-                                       .use_defaults = 1,
-                               };
-                               wimlib_set_default_compressor_params(WIMLIB_COMPRESSION_TYPE_LZX,
-                                                                    &params.hdr);
-                       }
                 }
         }
  
-       if (compress_slow)
-               set_compress_slow();
-
         if (!tstrcmp(wimfile, T("-"))) {
                 /* Writing captured WIM to standard output.  */
         #if 0
@@ -1985,13 +1980,13 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
                 if (ret)
                         goto out_free_wim;
         }
-       if (pack_ctype != WIMLIB_COMPRESSION_TYPE_INVALID) {
-               ret = wimlib_set_output_pack_compression_type(wim, pack_ctype);
+       if (solid_ctype != WIMLIB_COMPRESSION_TYPE_INVALID) {
+               ret = wimlib_set_output_pack_compression_type(wim, solid_ctype);
                 if (ret)
                         goto out_free_wim;
         }
-       if (pack_chunk_size != UINT32_MAX) {
-               ret = wimlib_set_output_pack_chunk_size(wim, pack_chunk_size);
+       if (solid_chunk_size != UINT32_MAX) {
+               ret = wimlib_set_output_pack_chunk_size(wim, solid_chunk_size);
                 if (ret)
                         goto out_free_wim;
         }
@@ -2593,8 +2588,8 @@ imagex_export(int argc, tchar **argv, int cmd)
         STRING_SET(refglobs);
         unsigned num_threads = 0;
         uint32_t chunk_size = UINT32_MAX;
-       uint32_t pack_chunk_size = UINT32_MAX;
-       int pack_ctype = WIMLIB_COMPRESSION_TYPE_INVALID;
+       uint32_t solid_chunk_size = UINT32_MAX;
+       int solid_ctype = WIMLIB_COMPRESSION_TYPE_INVALID;
  
         for_opt(c, export_options) {
                 switch (c) {
@@ -2613,14 +2608,14 @@ imagex_export(int argc, tchar **argv, int cmd)
                         if (compression_type == WIMLIB_COMPRESSION_TYPE_INVALID)
                                 goto out_err;
                         break;
-               case IMAGEX_RECOMPRESS_OPTION:
+               case IMAGEX_COMPRESS_SLOW_OPTION:
+                       set_compress_slow();
                         write_flags |= WIMLIB_WRITE_FLAG_RECOMPRESS;
                         break;
-               case IMAGEX_COMPRESS_SLOW_OPTION:
+               case IMAGEX_RECOMPRESS_OPTION:
                         write_flags |= WIMLIB_WRITE_FLAG_RECOMPRESS;
-                       set_compress_slow();
                         break;
-               case IMAGEX_PACK_STREAMS_OPTION:
+               case IMAGEX_SOLID_OPTION:
                         write_flags |= WIMLIB_WRITE_FLAG_PACK_STREAMS;
                         break;
                 case IMAGEX_CHUNK_SIZE_OPTION:
@@ -2628,14 +2623,14 @@ imagex_export(int argc, tchar **argv, int cmd)
                         if (chunk_size == UINT32_MAX)
                                 goto out_err;
                         break;
-               case IMAGEX_PACK_CHUNK_SIZE_OPTION:
-                       pack_chunk_size = parse_chunk_size(optarg);
-                       if (pack_chunk_size == UINT32_MAX)
+               case IMAGEX_SOLID_CHUNK_SIZE_OPTION:
+                       solid_chunk_size = parse_chunk_size(optarg);
+                       if (solid_chunk_size == UINT32_MAX)
                                 goto out_err;
                         break;
-               case IMAGEX_PACK_COMPRESS_OPTION:
-                       pack_ctype = get_compression_type(optarg);
-                       if (pack_ctype == WIMLIB_COMPRESSION_TYPE_INVALID)
+               case IMAGEX_SOLID_COMPRESS_OPTION:
+                       solid_ctype = get_compression_type(optarg);
+                       if (solid_ctype == WIMLIB_COMPRESSION_TYPE_INVALID)
                                 goto out_err;
                         break;
                 case IMAGEX_REF_OPTION:
@@ -2753,8 +2748,8 @@ imagex_export(int argc, tchar **argv, int cmd)
  
                 if (compression_type == WIMLIB_COMPRESSION_TYPE_INVALID) {
                         /* The user did not specify a compression type; default
-                        * to that of the source WIM, unless --pack-streams,
-                        * --solid, or --wimboot was specified.   */
+                        * to that of the source WIM, unless --solid or
+                        * --wimboot was specified.   */
  
                         if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS)
                                 compression_type = WIMLIB_COMPRESSION_TYPE_LZMS;
@@ -2789,13 +2784,13 @@ imagex_export(int argc, tchar **argv, int cmd)
                 if (ret)
                         goto out_free_dest_wim;
         }
-       if (pack_ctype != WIMLIB_COMPRESSION_TYPE_INVALID) {
-               ret = wimlib_set_output_pack_compression_type(dest_wim, pack_ctype);
+       if (solid_ctype != WIMLIB_COMPRESSION_TYPE_INVALID) {
+               ret = wimlib_set_output_pack_compression_type(dest_wim, solid_ctype);
                 if (ret)
                         goto out_free_dest_wim;
         }
-       if (pack_chunk_size != UINT32_MAX) {
-               ret = wimlib_set_output_pack_chunk_size(dest_wim, pack_chunk_size);
+       if (solid_chunk_size != UINT32_MAX) {
+               ret = wimlib_set_output_pack_chunk_size(dest_wim, solid_chunk_size);
                 if (ret)
                         goto out_free_dest_wim;
         }
@@ -3478,8 +3473,8 @@ imagex_optimize(int argc, tchar **argv, int cmd)
         int write_flags = WIMLIB_WRITE_FLAG_REBUILD;
         int compression_type = WIMLIB_COMPRESSION_TYPE_INVALID;
         uint32_t chunk_size = UINT32_MAX;
-       uint32_t pack_chunk_size = UINT32_MAX;
-       int pack_ctype = WIMLIB_COMPRESSION_TYPE_INVALID;
+       uint32_t solid_chunk_size = UINT32_MAX;
+       int solid_ctype = WIMLIB_COMPRESSION_TYPE_INVALID;
         int ret;
         WIMStruct *wim;
         const tchar *wimfile;
@@ -3502,29 +3497,29 @@ imagex_optimize(int argc, tchar **argv, int cmd)
                         if (compression_type == WIMLIB_COMPRESSION_TYPE_INVALID)
                                 goto out_err;
                         break;
-               case IMAGEX_RECOMPRESS_OPTION:
+               case IMAGEX_COMPRESS_SLOW_OPTION:
+                       set_compress_slow();
                         write_flags |= WIMLIB_WRITE_FLAG_RECOMPRESS;
                         break;
-               case IMAGEX_COMPRESS_SLOW_OPTION:
+               case IMAGEX_RECOMPRESS_OPTION:
                         write_flags |= WIMLIB_WRITE_FLAG_RECOMPRESS;
-                       set_compress_slow();
                         break;
                 case IMAGEX_CHUNK_SIZE_OPTION:
                         chunk_size = parse_chunk_size(optarg);
                         if (chunk_size == UINT32_MAX)
                                 goto out_err;
                         break;
-               case IMAGEX_PACK_CHUNK_SIZE_OPTION:
-                       pack_chunk_size = parse_chunk_size(optarg);
-                       if (pack_chunk_size == UINT32_MAX)
+               case IMAGEX_SOLID_CHUNK_SIZE_OPTION:
+                       solid_chunk_size = parse_chunk_size(optarg);
+                       if (solid_chunk_size == UINT32_MAX)
                                 goto out_err;
                         break;
-               case IMAGEX_PACK_COMPRESS_OPTION:
-                       pack_ctype = get_compression_type(optarg);
-                       if (pack_ctype == WIMLIB_COMPRESSION_TYPE_INVALID)
+               case IMAGEX_SOLID_COMPRESS_OPTION:
+                       solid_ctype = get_compression_type(optarg);
+                       if (solid_ctype == WIMLIB_COMPRESSION_TYPE_INVALID)
                                 goto out_err;
                         break;
-               case IMAGEX_PACK_STREAMS_OPTION:
+               case IMAGEX_SOLID_OPTION:
                         write_flags |= WIMLIB_WRITE_FLAG_PACK_STREAMS;
                         write_flags |= WIMLIB_WRITE_FLAG_RECOMPRESS;
                         break;
@@ -3569,13 +3564,13 @@ imagex_optimize(int argc, tchar **argv, int cmd)
                 if (ret)
                         goto out_wimlib_free;
         }
-       if (pack_ctype != WIMLIB_COMPRESSION_TYPE_INVALID) {
-               ret = wimlib_set_output_pack_compression_type(wim, pack_ctype);
+       if (solid_ctype != WIMLIB_COMPRESSION_TYPE_INVALID) {
+               ret = wimlib_set_output_pack_compression_type(wim, solid_ctype);
                 if (ret)
                         goto out_wimlib_free;
         }
-       if (pack_chunk_size != UINT32_MAX) {
-               ret = wimlib_set_output_pack_chunk_size(wim, pack_chunk_size);
+       if (solid_chunk_size != UINT32_MAX) {
+               ret = wimlib_set_output_pack_chunk_size(wim, solid_chunk_size);
                 if (ret)
                         goto out_wimlib_free;
         }
@@ -3993,54 +3988,68 @@ static const struct imagex_command imagex_commands[] = {
         [CMD_UPDATE]   = {T("update"),   imagex_update},
  };
  
+#ifdef __WIN32__
+
+   /* Can be a directory or source list file.  But source list file is probably
+    * a rare use case, so just say directory.  */
+#  define SOURCE_STR T("DIRECTORY")
+
+   /* Can only be a directory  */
+#  define TARGET_STR T("DIRECTORY")
+
+#else
+   /* Can be a directory, NTFS volume, or source list file. */
+#  define SOURCE_STR T("SOURCE")
+
+   /* Can be a directory or NTFS volume.  */
+#  define TARGET_STR T("TARGET")
+
+#endif
+
  static const tchar *usage_strings[] = {
  [CMD_APPEND] =
  T(
-"    %"TS" (DIRECTORY | NTFS_VOLUME) WIMFILE\n"
-"                    [IMAGE_NAME [IMAGE_DESCRIPTION]] [--boot] [--check]\n"
-"                    [--nocheck] [--flags EDITION_ID] [--dereference]\n"
-"                    [--config=FILE] [--threads=NUM_THREADS] [--source-list]\n"
-"                    [--no-acls] [--strict-acls] [--rpfix] [--norpfix]\n"
-"                    [--update-of=[WIMFILE:]IMAGE] [--wimboot]\n"
+"    %"TS" " SOURCE_STR " WIMFILE [IMAGE_NAME [IMAGE_DESC]]\n"
+"                    [--boot] [--check] [--nocheck] [--config=FILE]\n"
+"                    [--threads=NUM_THREADS] [--no-acls] [--strict-acls]\n"
+"                    [--rpfix] [--norpfix] [--update-of=[WIMFILE:]IMAGE]\n"
+"                    [--wimboot] [--unix-data] [--dereference]\n"
  ),
  [CMD_APPLY] =
  T(
-"    %"TS" WIMFILE [(IMAGE_NUM | IMAGE_NAME | all)]\n"
-"                    (DIRECTORY | NTFS_VOLUME) [--check] [--ref=\"GLOB\"]\n"
-"                    [--no-acls] [--strict-acls] [--no-attributes]\n"
-"                    [--rpfix] [--norpfix] [--include-invalid-names]\n"
-"                    [--wimboot] [--unix-data]\n"
+"    %"TS" WIMFILE [IMAGE] " TARGET_STR "\n"
+"                    [--check] [--ref=\"GLOB\"] [--no-acls] [--strict-acls]\n"
+"                    [--no-attributes] [--rpfix] [--norpfix]\n"
+"                    [--include-invalid-names] [--wimboot] [--unix-data]\n"
  ),
  [CMD_CAPTURE] =
  T(
-"    %"TS" (DIRECTORY | NTFS_VOLUME) WIMFILE\n"
-"                   [IMAGE_NAME [IMAGE_DESCRIPTION]] [--boot] [--check]\n"
-"                    [--nocheck] [--compress=TYPE] [--flags EDITION_ID]\n"
-"                    [--dereference] [--config=FILE] [--threads=NUM_THREADS]\n"
-"                    [--source-list] [--no-acls] [--strict-acls] [--rpfix]\n"
-"                    [--norpfix] [--update-of=[WIMFILE:]IMAGE]\n"
-"                    [--delta-from=WIMFILE] [--wimboot] [--unix-data]\n"
+"    %"TS" " SOURCE_STR " WIMFILE [IMAGE_NAME [IMAGE_DESC]]\n"
+"                    [--compress=TYPE] [--boot] [--check] [--nocheck]\n"
+"                    [--config=FILE] [--threads=NUM_THREADS]\n"
+"                    [--no-acls] [--strict-acls] [--rpfix] [--norpfix]\n"
+"                    [--update-of=[WIMFILE:]IMAGE] [--delta-from=WIMFILE]\n"
+"                    [--wimboot] [--unix-data] [--dereference] [--solid]\n"
  ),
  [CMD_DELETE] =
  T(
-"    %"TS" WIMFILE (IMAGE_NUM | IMAGE_NAME | all)\n"
-"                    [--check] [--soft]\n"
+"    %"TS" WIMFILE IMAGE [--check] [--soft]\n"
  ),
  [CMD_DIR] =
  T(
-"    %"TS" WIMFILE (IMAGE_NUM | IMAGE_NAME | all) [--path=PATH] [--detailed]\n"
+"    %"TS" WIMFILE IMAGE [--path=PATH] [--detailed]\n"
  ),
  [CMD_EXPORT] =
  T(
-"    %"TS" SRC_WIMFILE (SRC_IMAGE_NUM | SRC_IMAGE_NAME | all ) \n"
-"                    DEST_WIMFILE [DEST_IMAGE_NAME [DEST_IMAGE_DESCRIPTION]]\n"
+"    %"TS" SRC_WIMFILE SRC_IMAGE DEST_WIMFILE\n"
+"                        [DEST_IMAGE_NAME [DEST_IMAGE_DESC]]\n"
  "                    [--boot] [--check] [--nocheck] [--compress=TYPE]\n"
  "                    [--ref=\"GLOB\"] [--threads=NUM_THREADS] [--rebuild]\n"
  "                    [--wimboot]\n"
  ),
  [CMD_EXTRACT] =
  T(
-"    %"TS" WIMFILE (IMAGE_NUM | IMAGE_NAME) [(PATH | @LISTFILE)...]\n"
+"    %"TS" WIMFILE IMAGE [(PATH | @LISTFILE)...]\n"
  "                    [--check] [--ref=\"GLOB\"] [--dest-dir=CMD_DIR]\n"
  "                    [--to-stdout] [--no-acls] [--strict-acls]\n"
  "                    [--no-attributes] [--include-invalid-names]\n"
@@ -4048,8 +4057,8 @@ T(
  ),
  [CMD_INFO] =
  T(
-"    %"TS" WIMFILE [(IMAGE_NUM | IMAGE_NAME) [NEW_NAME\n"
-"                    [NEW_DESC]]] [--boot] [--check] [--nocheck] [--xml]\n"
+"    %"TS" WIMFILE [IMAGE [NEW_NAME [NEW_DESC]]]\n"
+"                    [--boot] [--check] [--nocheck] [--xml]\n"
  "                    [--extract-xml FILE] [--header] [--lookup-table]\n"
  ),
  [CMD_JOIN] =
@@ -4059,22 +4068,23 @@ T(
  #if WIM_MOUNTING_SUPPORTED
  [CMD_MOUNT] =
  T(
-"    %"TS" WIMFILE [(IMAGE_NUM | IMAGE_NAME)] DIRECTORY\n"
+"    %"TS" WIMFILE [IMAGE] DIRECTORY\n"
  "                    [--check] [--streams-interface=INTERFACE]\n"
  "                    [--ref=\"GLOB\"] [--allow-other] [--unix-data]\n"
  ),
  [CMD_MOUNTRW] =
  T(
-"    %"TS" WIMFILE [(IMAGE_NUM | IMAGE_NAME)] DIRECTORY\n"
+"    %"TS" WIMFILE [IMAGE] DIRECTORY\n"
  "                    [--check] [--streams-interface=INTERFACE]\n"
  "                    [--staging-dir=CMD_DIR] [--allow-other] [--unix-data]\n"
  ),
  #endif
  [CMD_OPTIMIZE] =
  T(
-"    %"TS" WIMFILE [--check] [--nocheck] [--recompress]\n"
-"                    [--recompress-slow] [--compress=TYPE]\n"
-"                    [--threads=NUM_THREADS]\n"
+"    %"TS" WIMFILE\n"
+"                    [--recompress] [--compress=TYPE]\n"
+"                    [--threads=NUM_THREADS] [--check] [--nocheck]\n"
+"\n"
  ),
  [CMD_SPLIT] =
  T(
@@ -4083,16 +4093,17 @@ T(
  #if WIM_MOUNTING_SUPPORTED
  [CMD_UNMOUNT] =
  T(
-"    %"TS" DIRECTORY [--commit] [--force] [--new-image]\n"
-"                         [--check] [--rebuild]\n"
+"    %"TS" DIRECTORY\n"
+"                    [--commit] [--force] [--new-image] [--check] [--rebuild]\n"
  ),
  #endif
  [CMD_UPDATE] =
  T(
-"    %"TS" WIMFILE [IMAGE_NUM | IMAGE_NAME] [--check] [--rebuild]\n"
-"                    [--threads=NUM_THREADS] [DEFAULT_ADD_OPTIONS]\n"
-"                    [DEFAULT_DELETE_OPTIONS] [--command=STRING]\n"
-"                    [--wimboot-config=FILE| [< CMDFILE]\n"
+"    %"TS" WIMFILE [IMAGE]\n"
+"                    [--check] [--rebuild] [--threads=NUM_THREADS]\n"
+"                    [DEFAULT_ADD_OPTIONS] [DEFAULT_DELETE_OPTIONS]\n"
+"                    [--command=STRING] [--wimboot-config=FILE]\n"
+"                    [< CMDFILE]\n"
  ),
  };
  
@@ -4170,10 +4181,10 @@ recommend_man_page(int cmd, FILE *fp)
  {
         const tchar *format_str;
  #ifdef __WIN32__
-       format_str = T("Uncommon options are not listed;\n"
+       format_str = T("Some uncommon options are not listed;\n"
                        "See %"TS".pdf in the doc directory for more details.\n");
  #else
-       format_str = T("Uncommon options are not listed;\n"
+       format_str = T("Some uncommon options are not listed;\n"
                        "Try `man %"TS"' for more details.\n");
  #endif
         tfprintf(fp, format_str, get_cmd_string(cmd, true));
@@ -4200,11 +4211,14 @@ usage_all(FILE *fp)
         T(
  "    %"TS" --help\n"
  "    %"TS" --version\n"
-"\n"
-"    The compression TYPE may be \"maximum\", \"fast\", or \"none\".\n"
  "\n"
         );
         tfprintf(fp, extra, invocation_name, invocation_name);
+       tfprintf(fp,
+                T("IMAGE can be the 1-based index or name of an image in the WIM file.\n"
+                  "For some commands IMAGE is optional if the WIM file only contains one image.\n"
+                  "For some commands IMAGE may be \"all\".\n"
+                  "\n"));
         recommend_man_page(CMD_NONE, fp);
  }
  
diff --git a/src/compress.c b/src/compress.c

index 039836af5c8f19d62b954c5e79ea537762bca109..bea8ea5a2c79c500437e5419b32e517e83fef539 100644 (file)
--- a/src/compress.c
+++ b/src/compress.c
@@ -6,7 +6,7 @@
   */
  
  /*
- * Copyright (C) 2013 Eric Biggers
+ * Copyright (C) 2013, 2014 Eric Biggers
   *
   * This file is part of wimlib, a library for working with WIM files.
   *
@@ -29,12 +29,19 @@
  #endif
  
  #include "wimlib.h"
+#include "wimlib/assert.h"
+#include "wimlib/error.h"
  #include "wimlib/compressor_ops.h"
  #include "wimlib/util.h"
  
+#include <stdlib.h>
+#include <string.h>
+
  struct wimlib_compressor {
         const struct compressor_ops *ops;
         void *private;
+       enum wimlib_compression_type ctype;
+       size_t max_block_size;
  };
  
  static const struct compressor_ops *compressor_ops[] = {
@@ -43,9 +50,11 @@ static const struct compressor_ops *compressor_ops[] = {
         [WIMLIB_COMPRESSION_TYPE_LZMS]   = &lzms_compressor_ops,
  };
  
-static struct wimlib_compressor_params_header *
-compressor_default_params[ARRAY_LEN(compressor_ops)] = {
-};
+/* Scale: 10 = low, 50 = medium, 100 = high */
+
+#define DEFAULT_COMPRESSION_LEVEL 50
+
+static unsigned int default_compression_levels[ARRAY_LEN(compressor_ops)];
  
  static bool
  compressor_ctype_valid(int ctype)
@@ -56,47 +65,27 @@ compressor_ctype_valid(int ctype)
  }
  
  WIMLIBAPI int
-wimlib_set_default_compressor_params(enum wimlib_compression_type ctype,
-                                    const struct wimlib_compressor_params_header *params)
+wimlib_set_default_compression_level(enum wimlib_compression_type ctype,
+                                    unsigned int compression_level)
  {
-       struct wimlib_compressor_params_header *dup;
-
-       if (!compressor_ctype_valid(ctype))
-               return WIMLIB_ERR_INVALID_COMPRESSION_TYPE;
-
-       if (params != NULL &&
-           compressor_ops[ctype]->params_valid != NULL &&
-           !compressor_ops[ctype]->params_valid(params))
-               return WIMLIB_ERR_INVALID_PARAM;
+       if ((int)ctype == -1) {
+               for (int i = 0; i < ARRAY_LEN(default_compression_levels); i++)
+                       default_compression_levels[i] = compression_level;
+       } else {
+               if (!compressor_ctype_valid(ctype))
+                       return WIMLIB_ERR_INVALID_COMPRESSION_TYPE;
  
-       dup = NULL;
-       if (params) {
-               dup = memdup(params, params->size);
-               if (dup == NULL)
-                       return WIMLIB_ERR_NOMEM;
+               default_compression_levels[ctype] = compression_level;
         }
-
-       FREE(compressor_default_params[ctype]);
-       compressor_default_params[ctype] = dup;
         return 0;
  }
  
-void
-cleanup_compressor_params(void)
-{
-       for (size_t i = 0; i < ARRAY_LEN(compressor_default_params); i++) {
-               FREE(compressor_default_params[i]);
-               compressor_default_params[i] = NULL;
-       }
-}
-
  WIMLIBAPI u64
  wimlib_get_compressor_needed_memory(enum wimlib_compression_type ctype,
                                     size_t max_block_size,
-                                   const struct wimlib_compressor_params_header *extra_params)
+                                   unsigned int compression_level)
  {
         const struct compressor_ops *ops;
-       const struct wimlib_compressor_params_header *params;
         u64 size;
  
         if (!compressor_ctype_valid(ctype))
@@ -104,25 +93,21 @@ wimlib_get_compressor_needed_memory(enum wimlib_compression_type ctype,
  
         ops = compressor_ops[ctype];
  
-       if (extra_params) {
-               params = extra_params;
-               if (ops->params_valid && !ops->params_valid(params))
-                       return 0;
-       } else {
-               params = compressor_default_params[ctype];
-       }
+       if (compression_level == 0)
+               compression_level = default_compression_levels[ctype];
+       if (compression_level == 0)
+               compression_level = DEFAULT_COMPRESSION_LEVEL;
  
         size = sizeof(struct wimlib_compressor);
         if (ops->get_needed_memory)
-               size += ops->get_needed_memory(max_block_size, params);
+               size += ops->get_needed_memory(max_block_size, compression_level);
         return size;
  }
  
-
  WIMLIBAPI int
  wimlib_create_compressor(enum wimlib_compression_type ctype,
                          size_t max_block_size,
-                        const struct wimlib_compressor_params_header *extra_params,
+                        unsigned int compression_level,
                          struct wimlib_compressor **c_ret)
  {
         struct wimlib_compressor *c;
@@ -130,6 +115,9 @@ wimlib_create_compressor(enum wimlib_compression_type ctype,
         if (c_ret == NULL)
                 return WIMLIB_ERR_INVALID_PARAM;
  
+       if (max_block_size == 0)
+               return WIMLIB_ERR_INVALID_PARAM;
+
         if (!compressor_ctype_valid(ctype))
                 return WIMLIB_ERR_INVALID_COMPRESSION_TYPE;
  
@@ -138,21 +126,19 @@ wimlib_create_compressor(enum wimlib_compression_type ctype,
                 return WIMLIB_ERR_NOMEM;
         c->ops = compressor_ops[ctype];
         c->private = NULL;
+       c->ctype = ctype;
+       c->max_block_size = max_block_size;
         if (c->ops->create_compressor) {
-               const struct wimlib_compressor_params_header *params;
                 int ret;
  
-               if (extra_params) {
-                       params = extra_params;
-                       if (c->ops->params_valid && !c->ops->params_valid(params)) {
-                               FREE(c);
-                               return WIMLIB_ERR_INVALID_PARAM;
-                       }
-               } else {
-                       params = compressor_default_params[ctype];
-               }
+               if (compression_level == 0)
+                       compression_level = default_compression_levels[ctype];
+               if (compression_level == 0)
+                       compression_level = DEFAULT_COMPRESSION_LEVEL;
+
                 ret = c->ops->create_compressor(max_block_size,
-                                               params, &c->private);
+                                               compression_level,
+                                               &c->private);
                 if (ret) {
                         FREE(c);
                         return ret;
@@ -167,9 +153,65 @@ wimlib_compress(const void *uncompressed_data, size_t uncompressed_size,
                 void *compressed_data, size_t compressed_size_avail,
                 struct wimlib_compressor *c)
  {
-       return c->ops->compress(uncompressed_data, uncompressed_size,
-                               compressed_data, compressed_size_avail,
-                               c->private);
+       size_t compressed_size;
+
+       wimlib_assert(uncompressed_size <= c->max_block_size);
+
+       compressed_size = c->ops->compress(uncompressed_data,
+                                          uncompressed_size,
+                                          compressed_data,
+                                          compressed_size_avail,
+                                          c->private);
+
+       /* (Optional) Verify that we really get the same thing back when
+        * decompressing.  Should always be the case, unless there's a bug.  */
+#ifdef ENABLE_VERIFY_COMPRESSION
+       if (compressed_size != 0) {
+               struct wimlib_decompressor *d;
+               int res;
+               u8 *buf;
+
+               buf = MALLOC(uncompressed_size);
+               if (!buf) {
+                       WARNING("Unable to verify results of %s compression "
+                               "(can't allocate buffer)",
+                               wimlib_get_compression_type_string(c->ctype));
+                       return 0;
+               }
+
+               res = wimlib_create_decompressor(c->ctype,
+                                                c->max_block_size, &d);
+               if (res) {
+                       WARNING("Unable to verify results of %s compression "
+                               "(can't create decompressor)",
+                               wimlib_get_compression_type_string(c->ctype));
+                       FREE(buf);
+                       return 0;
+               }
+
+               res = wimlib_decompress(compressed_data, compressed_size,
+                                       buf, uncompressed_size, d);
+               wimlib_free_decompressor(d);
+               if (res) {
+                       ERROR("Failed to decompress our %s-compressed data",
+                             wimlib_get_compression_type_string(c->ctype));
+                       FREE(buf);
+                       abort();
+               }
+
+               res = memcmp(uncompressed_data, buf, uncompressed_size);
+               FREE(buf);
+
+               if (res) {
+                       ERROR("Our %s-compressed data did not decompress "
+                             "to original",
+                             wimlib_get_compression_type_string(c->ctype));
+                       abort();
+               }
+       }
+#endif /* ENABLE_VERIFY_COMPRESSION */
+
+       return compressed_size;
  }
  
  WIMLIBAPI void
diff --git a/src/compress_parallel.c b/src/compress_parallel.c

index e083c079ab614a26e83edabdefbcf6fa0e06b102..e624819be77bb432a6c685ece3e2e039537c56b7 100644 (file)
--- a/src/compress_parallel.c
+++ b/src/compress_parallel.c
@@ -451,7 +451,7 @@ new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
                         + 1000000
                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
                                                                             out_chunk_size,
-                                                                           NULL);
+                                                                           0);
                 if (approx_mem_required <= max_memory)
                         break;
  
@@ -510,8 +510,8 @@ new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
  
                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
-               ret = wimlib_create_compressor(out_ctype, out_chunk_size,
-                                              NULL, &dat->compressor);
+               ret = wimlib_create_compressor(out_ctype, out_chunk_size, 0,
+                                              &dat->compressor);
                 if (ret)
                         goto err;
         }
diff --git a/src/compress_serial.c b/src/compress_serial.c

index 1a333fe0fb07580cead9eaa3e80a495e2eb7b3a1..01187ad383f2f2a8c4ad7f6bc54aca5ea4a158fd 100644 (file)
--- a/src/compress_serial.c
+++ b/src/compress_serial.c
@@ -121,7 +121,7 @@ new_serial_chunk_compressor(int out_ctype, u32 out_chunk_size,
         ctx->base.get_chunk = serial_chunk_compressor_get_chunk;
  
         ret = wimlib_create_compressor(out_ctype, out_chunk_size,
-                                      NULL, &ctx->compressor);
+                                      0, &ctx->compressor);
         if (ret)
                 goto err;
  
diff --git a/src/decompress.c b/src/decompress.c

index c9965a0ffeaf22aad2a8f373272c7b03e5b2d371..3e110e84d7a9bfea65de133d3e0deffe9cc53ae3 100644 (file)
--- a/src/decompress.c
+++ b/src/decompress.c
@@ -6,7 +6,7 @@
   */
  
  /*
- * Copyright (C) 2013 Eric Biggers
+ * Copyright (C) 2013, 2014 Eric Biggers
   *
   * This file is part of wimlib, a library for working with WIM files.
   *
@@ -43,10 +43,6 @@ static const struct decompressor_ops *decompressor_ops[] = {
         [WIMLIB_COMPRESSION_TYPE_LZMS]   = &lzms_decompressor_ops,
  };
  
-static struct wimlib_decompressor_params_header *
-decompressor_default_params[ARRAY_LEN(decompressor_ops)] = {
-};
-
  static bool
  decompressor_ctype_valid(int ctype)
  {
@@ -55,40 +51,9 @@ decompressor_ctype_valid(int ctype)
                 decompressor_ops[ctype] != NULL);
  }
  
-WIMLIBAPI int
-wimlib_set_default_decompressor_params(enum wimlib_compression_type ctype,
-                                      const struct wimlib_decompressor_params_header *params)
-{
-       struct wimlib_decompressor_params_header *dup;
-
-       if (!decompressor_ctype_valid(ctype))
-               return WIMLIB_ERR_INVALID_COMPRESSION_TYPE;
-
-       dup = NULL;
-       if (params) {
-               dup = memdup(params, params->size);
-               if (dup == NULL)
-                       return WIMLIB_ERR_NOMEM;
-       }
-
-       FREE(decompressor_default_params[ctype]);
-       decompressor_default_params[ctype] = dup;
-       return 0;
-}
-
-void
-cleanup_decompressor_params(void)
-{
-       for (size_t i = 0; i < ARRAY_LEN(decompressor_default_params); i++) {
-               FREE(decompressor_default_params[i]);
-               decompressor_default_params[i] = NULL;
-       }
-}
-
  WIMLIBAPI int
  wimlib_create_decompressor(enum wimlib_compression_type ctype,
                            size_t max_block_size,
-                          const struct wimlib_decompressor_params_header *extra_params,
                            struct wimlib_decompressor **dec_ret)
  {
         struct wimlib_decompressor *dec;
@@ -105,15 +70,9 @@ wimlib_create_decompressor(enum wimlib_compression_type ctype,
         dec->ops = decompressor_ops[ctype];
         dec->private = NULL;
         if (dec->ops->create_decompressor) {
-               const struct wimlib_decompressor_params_header *params;
                 int ret;
  
-               if (extra_params)
-                       params = extra_params;
-               else
-                       params = decompressor_default_params[ctype];
                 ret = dec->ops->create_decompressor(max_block_size,
-                                                   params,
                                                     &dec->private);
                 if (ret) {
                         FREE(dec);
diff --git a/src/divsufsort.c b/src/divsufsort.c

new file mode 100644 (file)

index 0000000..fe2a8eb
--- /dev/null
+++ b/src/divsufsort.c
@@ -0,0 +1,1604 @@
+/*
+ * divsufsort.c for libdivsufsort-lite
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib/divsufsort.h"
+#include "wimlib/lz_mf.h"
+#include "wimlib/util.h"
+
+/*- Constants -*/
+#define ALPHABET_SIZE 256
+#define BUCKET_A_SIZE (ALPHABET_SIZE)
+#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
+
+#define SS_INSERTIONSORT_THRESHOLD 8
+
+#define SS_BLOCKSIZE 1024
+
+/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
+#if SS_BLOCKSIZE == 0
+# define SS_MISORT_STACKSIZE (96)
+#elif SS_BLOCKSIZE <= 4096
+# define SS_MISORT_STACKSIZE (16)
+#else
+# define SS_MISORT_STACKSIZE (24)
+#endif
+#define SS_SMERGE_STACKSIZE (32)
+#define TR_INSERTIONSORT_THRESHOLD (8)
+#define TR_STACKSIZE (64)
+
+
+/*- Macros -*/
+#define SWAP swap
+#define MIN min
+#define MAX max
+
+#define STACK_PUSH(_a, _b, _c, _d)\
+  do {\
+    LZ_ASSERT(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize++].d = (_d);\
+  } while(0)
+#define STACK_PUSH5(_a, _b, _c, _d, _e)\
+  do {\
+    LZ_ASSERT(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
+  } while(0)
+#define STACK_POP(_a, _b, _c, _d)\
+  do {\
+    LZ_ASSERT(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
+  } while(0)
+#define STACK_POP5(_a, _b, _c, _d, _e)\
+  do {\
+    LZ_ASSERT(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
+  } while(0)
+#define BUCKET_A(_c0) bucket_A[(_c0)]
+#if ALPHABET_SIZE == 256
+#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
+#else
+#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
+#endif
+
+
+/*- Private Functions -*/
+
+static const int lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static inline
+int
+ss_ilg(int n) {
+#if SS_BLOCKSIZE == 0
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+#elif SS_BLOCKSIZE < 256
+  return lg_table[n];
+#else
+  return (n & 0xff00) ?
+          8 + lg_table[(n >> 8) & 0xff] :
+          0 + lg_table[(n >> 0) & 0xff];
+#endif
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+#if SS_BLOCKSIZE != 0
+
+static const int sqq_table[256] = {
+  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
+ 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
+ 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
+110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
+156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
+169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
+181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
+192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
+202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
+212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
+221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
+230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
+239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
+247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
+};
+
+static inline
+int
+ss_isqrt(int x) {
+  int y, e;
+
+  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
+  e = (x & 0xffff0000) ?
+        ((x & 0xff000000) ?
+          24 + lg_table[(x >> 24) & 0xff] :
+          16 + lg_table[(x >> 16) & 0xff]) :
+        ((x & 0x0000ff00) ?
+           8 + lg_table[(x >>  8) & 0xff] :
+           0 + lg_table[(x >>  0) & 0xff]);
+
+  if(e >= 16) {
+    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
+    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
+    y = (y + 1 + x / y) >> 1;
+  } else if(e >= 8) {
+    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
+  } else {
+    return sqq_table[x] >> 4;
+  }
+
+  return (x < (y * y)) ? y - 1 : y;
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Compares two suffixes. */
+static inline
+int
+ss_compare(const unsigned char *T,
+           const int *p1, const int *p2,
+           int depth) {
+  const unsigned char *U1, *U2, *U1n, *U2n;
+
+  for(U1 = T + depth + *p1,
+      U2 = T + depth + *p2,
+      U1n = T + *(p1 + 1) + 2,
+      U2n = T + *(p2 + 1) + 2;
+      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
+      ++U1, ++U2) {
+  }
+
+  return U1 < U1n ?
+        (U2 < U2n ? *U1 - *U2 : 1) :
+        (U2 < U2n ? -1 : 0);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
+
+/* Insertionsort for small size groups */
+static
+void
+ss_insertionsort(const unsigned char *T, const int *PA,
+                 int *first, int *last, int depth) {
+  int *i, *j;
+  int t;
+  int r;
+
+  for(i = last - 2; first <= i; --i) {
+    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
+      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
+      if(last <= j) { break; }
+    }
+    if(r == 0) { *j = ~*j; }
+    *(j - 1) = t;
+  }
+}
+
+#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static inline
+void
+ss_fixdown(const unsigned char *Td, const int *PA,
+           int *SA, int i, int size) {
+  int j, k;
+  int v;
+  int c, d, e;
+
+  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = Td[PA[SA[k = j++]]];
+    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
+  int i, m;
+  int t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    ss_fixdown(Td, PA, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static inline
+int *
+ss_median3(const unsigned char *Td, const int *PA,
+           int *v1, int *v2, int *v3) {
+  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
+  if(Td[PA[*v2]] > Td[PA[*v3]]) {
+    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static inline
+int *
+ss_median5(const unsigned char *Td, const int *PA,
+           int *v1, int *v2, int *v3, int *v4, int *v5) {
+  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
+  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
+  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
+  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static inline
+int *
+ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
+  int *middle;
+  int t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return ss_median3(Td, PA, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
+  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
+  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return ss_median3(Td, PA, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Binary partition for substrings. */
+static inline
+int *
+ss_partition(const int *PA,
+                    int *first, int *last, int depth) {
+  int *a, *b;
+  int t;
+  for(a = first - 1, b = last;;) {
+    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
+    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
+    if(b <= a) { break; }
+    t = ~*b;
+    *b = *a;
+    *a = t;
+  }
+  if(first < a) { *first = ~*first; }
+  return a;
+}
+
+/* Multikey introsort for medium size groups. */
+static
+void
+ss_mintrosort(const unsigned char *T, const int *PA,
+              int *first, int *last,
+              int depth) {
+#define STACK_SIZE SS_MISORT_STACKSIZE
+  struct { int *a, *b, c; int d; } stack[STACK_SIZE];
+  const unsigned char *Td;
+  int *a, *b, *c, *d, *e, *f;
+  int s, t;
+  int ssize;
+  int limit;
+  int v, x = 0;
+
+  for(ssize = 0, limit = ss_ilg(last - first);;) {
+
+    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
+#if 1 < SS_INSERTIONSORT_THRESHOLD
+      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
+#endif
+      STACK_POP(first, last, depth, limit);
+      continue;
+    }
+
+    Td = T + depth;
+    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
+    if(limit < 0) {
+      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
+        if((x = Td[PA[*a]]) != v) {
+          if(1 < (a - first)) { break; }
+          v = x;
+          first = a;
+        }
+      }
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, a, depth);
+      }
+      if((a - first) <= (last - a)) {
+        if(1 < (a - first)) {
+          STACK_PUSH(a, last, depth, -1);
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        } else {
+          first = a, limit = -1;
+        }
+      } else {
+        if(1 < (last - a)) {
+          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
+          first = a, limit = -1;
+        } else {
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        }
+      }
+      continue;
+    }
+
+    /* choose pivot */
+    a = ss_pivot(Td, PA, first, last);
+    v = Td[PA[*a]];
+    SWAP(*first, *a);
+
+    /* partition */
+    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
+    if(((a = b) < last) && (x < v)) {
+      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+    }
+    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
+    if((b < (d = c)) && (x > v)) {
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+    for(; b < c;) {
+      SWAP(*b, *c);
+      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+
+    if(a <= d) {
+      c = b - 1;
+
+      if((s = a - first) > (t = b - a)) { s = t; }
+      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+      if((s = d - c) > (t = last - d - 1)) { s = t; }
+      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+
+      a = first + (b - a), c = last - (d - c);
+      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
+
+      if((a - first) <= (last - c)) {
+        if((last - c) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(c, last, depth, limit);
+          last = a;
+        } else if((a - first) <= (c - b)) {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          last = a;
+        } else {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(first, a, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      } else {
+        if((a - first) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(first, a, depth, limit);
+          first = c;
+        } else if((last - c) <= (c - b)) {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          first = c;
+        } else {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(c, last, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      }
+    } else {
+      limit += 1;
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, last, depth);
+        limit = ss_ilg(last - first);
+      }
+      depth += 1;
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if SS_BLOCKSIZE != 0
+
+static inline
+void
+ss_blockswap(int *a, int *b, int n) {
+  int t;
+  for(; 0 < n; --n, ++a, ++b) {
+    t = *a, *a = *b, *b = t;
+  }
+}
+
+static inline
+void
+ss_rotate(int *first, int *middle, int *last) {
+  int *a, *b, t;
+  int l, r;
+  l = middle - first, r = last - middle;
+  for(; (0 < l) && (0 < r);) {
+    if(l == r) { ss_blockswap(first, middle, l); break; }
+    if(l < r) {
+      a = last - 1, b = middle - 1;
+      t = *a;
+      do {
+        *a-- = *b, *b-- = *a;
+        if(b < first) {
+          *a = t;
+          last = a;
+          if((r -= l + 1) <= l) { break; }
+          a -= 1, b = middle - 1;
+          t = *a;
+        }
+      } while(1);
+    } else {
+      a = first, b = middle;
+      t = *a;
+      do {
+        *a++ = *b, *b++ = *a;
+        if(last <= b) {
+          *a = t;
+          first = a + 1;
+          if((l -= r + 1) <= r) { break; }
+          a += 1, b = middle;
+          t = *a;
+        }
+      } while(1);
+    }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static
+void
+ss_inplacemerge(const unsigned char *T, const int *PA,
+                int *first, int *middle, int *last,
+                int depth) {
+  const int *p;
+  int *a, *b;
+  int len, half;
+  int q, r;
+  int x;
+
+  for(;;) {
+    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
+    else                { x = 0; p = PA +  *(last - 1); }
+    for(a = first, len = middle - first, half = len >> 1, r = -1;
+        0 < len;
+        len = half, half >>= 1) {
+      b = a + half;
+      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
+      if(q < 0) {
+        a = b + 1;
+        half -= (len & 1) ^ 1;
+      } else {
+        r = q;
+      }
+    }
+    if(a < middle) {
+      if(r == 0) { *a = ~*a; }
+      ss_rotate(a, middle, last);
+      last -= middle - a;
+      middle = a;
+      if(first == middle) { break; }
+    }
+    --last;
+    if(x != 0) { while(*--last < 0) { } }
+    if(middle == last) { break; }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Merge-forward with internal buffer. */
+static
+void
+ss_mergeforward(const unsigned char *T, const int *PA,
+                int *first, int *middle, int *last,
+                int *buf, int depth) {
+  int *a, *b, *c, *bufend;
+  int t;
+  int r;
+
+  bufend = buf + (middle - first) - 1;
+  ss_blockswap(buf, first, middle - first);
+
+  for(t = *(a = first), b = buf, c = middle;;) {
+    r = ss_compare(T, PA + *b, PA + *c, depth);
+    if(r < 0) {
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+    } else if(r > 0) {
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    } else {
+      *c = ~*c;
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    }
+  }
+}
+
+/* Merge-backward with internal buffer. */
+static
+void
+ss_mergebackward(const unsigned char *T, const int *PA,
+                 int *first, int *middle, int *last,
+                 int *buf, int depth) {
+  const int *p1, *p2;
+  int *a, *b, *c, *bufend;
+  int t;
+  int r;
+  int x;
+
+  bufend = buf + (last - middle) - 1;
+  ss_blockswap(buf, middle, last - middle);
+
+  x = 0;
+  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
+  else                  { p1 = PA +  *bufend; }
+  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
+  else                  { p2 = PA +  *(middle - 1); }
+  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
+    r = ss_compare(T, p1, p2, depth);
+    if(0 < r) {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = *b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+    } else if(r < 0) {
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    } else {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = ~*b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    }
+  }
+}
+
+/* D&C based merge. */
+static
+void
+ss_swapmerge(const unsigned char *T, const int *PA,
+             int *first, int *middle, int *last,
+             int *buf, int bufsize, int depth) {
+#define STACK_SIZE SS_SMERGE_STACKSIZE
+#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
+#define MERGE_CHECK(a, b, c)\
+  do {\
+    if(((c) & 1) ||\
+       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
+      *(a) = ~*(a);\
+    }\
+    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
+      *(b) = ~*(b);\
+    }\
+  } while(0)
+  struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
+  int *l, *r, *lm, *rm;
+  int m, len, half;
+  int ssize;
+  int check, next;
+
+  for(check = 0, ssize = 0;;) {
+    if((last - middle) <= bufsize) {
+      if((first < middle) && (middle < last)) {
+        ss_mergebackward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    if((middle - first) <= bufsize) {
+      if(first < middle) {
+        ss_mergeforward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
+        0 < len;
+        len = half, half >>= 1) {
+      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
+                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
+        m += half + 1;
+        half -= (len & 1) ^ 1;
+      }
+    }
+
+    if(0 < m) {
+      lm = middle - m, rm = middle + m;
+      ss_blockswap(lm, middle, m);
+      l = r = middle, next = 0;
+      if(rm < last) {
+        if(*rm < 0) {
+          *rm = ~*rm;
+          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
+          next |= 1;
+        } else if(first < lm) {
+          for(; *r < 0; ++r) { }
+          next |= 2;
+        }
+      }
+
+      if((l - first) <= (last - r)) {
+        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
+        middle = lm, last = l, check = (check & 3) | (next & 4);
+      } else {
+        if((next & 2) && (r == middle)) { next ^= 6; }
+        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
+        first = r, middle = rm, check = (next & 3) | (check & 4);
+      }
+    } else {
+      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
+        *middle = ~*middle;
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Substring sort */
+static
+void
+sssort(const unsigned char *T, const int *PA,
+       int *first, int *last,
+       int *buf, int bufsize,
+       int depth, int n, int lastsuffix) {
+  int *a;
+#if SS_BLOCKSIZE != 0
+  int *b, *middle, *curbuf;
+  int j, k, curbufsize, limit;
+#endif
+  int i;
+
+  if(lastsuffix != 0) { ++first; }
+
+#if SS_BLOCKSIZE == 0
+  ss_mintrosort(T, PA, first, last, depth);
+#else
+  if((bufsize < SS_BLOCKSIZE) &&
+      (bufsize < (last - first)) &&
+      (bufsize < (limit = ss_isqrt(last - first)))) {
+    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
+    buf = middle = last - limit, bufsize = limit;
+  } else {
+    middle = last, limit = 0;
+  }
+  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#endif
+    curbufsize = last - (a + SS_BLOCKSIZE);
+    curbuf = a + SS_BLOCKSIZE;
+    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
+    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
+      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
+    }
+  }
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+  ss_mintrosort(T, PA, a, middle, depth);
+#elif 1 < SS_BLOCKSIZE
+  ss_insertionsort(T, PA, a, middle, depth);
+#endif
+  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
+    if(i & 1) {
+      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
+      a -= k;
+    }
+  }
+  if(limit != 0) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, middle, last, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, middle, last, depth);
+#endif
+    ss_inplacemerge(T, PA, first, middle, last, depth);
+  }
+#endif
+
+  if(lastsuffix != 0) {
+    /* Insert last type B* suffix. */
+    int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
+    for(a = first, i = *(first - 1);
+        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
+        ++a) {
+      *(a - 1) = *a;
+    }
+    *(a - 1) = i;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static inline
+int
+tr_ilg(int n) {
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Simple insertionsort for small size groups. */
+static
+void
+tr_insertionsort(const int *ISAd, int *first, int *last) {
+  int *a, *b;
+  int t, r;
+
+  for(a = first + 1; a < last; ++a) {
+    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
+      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
+      if(b < first) { break; }
+    }
+    if(r == 0) { *b = ~*b; }
+    *(b + 1) = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static inline
+void
+tr_fixdown(const int *ISAd, int *SA, int i, int size) {
+  int j, k;
+  int v;
+  int c, d, e;
+
+  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = ISAd[SA[k = j++]];
+    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+tr_heapsort(const int *ISAd, int *SA, int size) {
+  int i, m;
+  int t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    tr_fixdown(ISAd, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static inline
+int *
+tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
+  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
+  if(ISAd[*v2] > ISAd[*v3]) {
+    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static inline
+int *
+tr_median5(const int *ISAd,
+           int *v1, int *v2, int *v3, int *v4, int *v5) {
+  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
+  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
+  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
+  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static inline
+int *
+tr_pivot(const int *ISAd, int *first, int *last) {
+  int *middle;
+  int t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return tr_median3(ISAd, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
+  middle = tr_median3(ISAd, middle - t, middle, middle + t);
+  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return tr_median3(ISAd, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+typedef struct _trbudget_t trbudget_t;
+struct _trbudget_t {
+  int chance;
+  int remain;
+  int incval;
+  int count;
+};
+
+static inline
+void
+trbudget_init(trbudget_t *budget, int chance, int incval) {
+  budget->chance = chance;
+  budget->remain = budget->incval = incval;
+}
+
+static inline
+int
+trbudget_check(trbudget_t *budget, int size) {
+  if(size <= budget->remain) { budget->remain -= size; return 1; }
+  if(budget->chance == 0) { budget->count += size; return 0; }
+  budget->remain += budget->incval - size;
+  budget->chance -= 1;
+  return 1;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static inline
+void
+tr_partition(const int *ISAd,
+             int *first, int *middle, int *last,
+             int **pa, int **pb, int v) {
+  int *a, *b, *c, *d, *e, *f;
+  int t, s;
+  int x = 0;
+
+  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
+  if(((a = b) < last) && (x < v)) {
+    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+  }
+  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
+  if((b < (d = c)) && (x > v)) {
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+  for(; b < c;) {
+    SWAP(*b, *c);
+    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+
+  if(a <= d) {
+    c = b - 1;
+    if((s = a - first) > (t = b - a)) { s = t; }
+    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    if((s = d - c) > (t = last - d - 1)) { s = t; }
+    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    first += (b - a), last -= (d - c);
+  }
+  *pa = first, *pb = last;
+}
+
+static
+void
+tr_copy(int *ISA, const int *SA,
+        int *first, int *a, int *b, int *last,
+        int depth) {
+  /* sort suffixes of middle partition
+     by using sorted order of suffixes of left and right partition. */
+  int *c, *d, *e;
+  int s, v;
+
+  v = b - SA - 1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      ISA[s] = d - SA;
+    }
+  }
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      ISA[s] = d - SA;
+    }
+  }
+}
+
+static
+void
+tr_partialcopy(int *ISA, const int *SA,
+               int *first, int *a, int *b, int *last,
+               int depth) {
+  int *c, *d, *e;
+  int s, v;
+  int rank, lastrank, newrank = -1;
+
+  v = b - SA - 1;
+  lastrank = -1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+
+  lastrank = -1;
+  for(e = d; first <= e; --e) {
+    rank = ISA[*e];
+    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
+    if(newrank != rank) { ISA[*e] = newrank; }
+  }
+
+  lastrank = -1;
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+}
+
+static
+void
+tr_introsort(int *ISA, const int *ISAd,
+             int *SA, int *first, int *last,
+             trbudget_t *budget) {
+#define STACK_SIZE TR_STACKSIZE
+  struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
+  int *a, *b, *c;
+  int v, x = 0;
+  int incr = ISAd - ISA;
+  int limit, next;
+  int ssize, trlink = -1;
+
+  for(ssize = 0, limit = tr_ilg(last - first);;) {
+
+    if(limit < 0) {
+      if(limit == -1) {
+        /* tandem repeat partition */
+        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
+
+        /* update ranks */
+        if(a < last) {
+          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+        }
+        if(b < last) {
+          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
+        }
+
+        /* push */
+        if(1 < (b - a)) {
+          STACK_PUSH5(NULL, a, b, 0, 0);
+          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
+          trlink = ssize - 2;
+        }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
+            last = a, limit = tr_ilg(a - first);
+          } else if(1 < (last - b)) {
+            first = b, limit = tr_ilg(last - b);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
+            first = b, limit = tr_ilg(last - b);
+          } else if(1 < (a - first)) {
+            last = a, limit = tr_ilg(a - first);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      } else if(limit == -2) {
+        /* tandem repeat copy */
+        a = stack[--ssize].b, b = stack[ssize].c;
+        if(stack[ssize].d == 0) {
+          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
+        } else {
+          if(0 <= trlink) { stack[trlink].d = -1; }
+          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
+        }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      } else {
+        /* sorted partition */
+        if(0 <= *first) {
+          a = first;
+          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
+          first = a;
+        }
+        if(first < last) {
+          a = first; do { *a = ~*a; } while(*++a < 0);
+          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
+          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
+
+          /* push */
+          if(trbudget_check(budget, a - first)) {
+            if((a - first) <= (last - a)) {
+              STACK_PUSH5(ISAd, a, last, -3, trlink);
+              ISAd += incr, last = a, limit = next;
+            } else {
+              if(1 < (last - a)) {
+                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
+                first = a, limit = -3;
+              } else {
+                ISAd += incr, last = a, limit = next;
+              }
+            }
+          } else {
+            if(0 <= trlink) { stack[trlink].d = -1; }
+            if(1 < (last - a)) {
+              first = a, limit = -3;
+            } else {
+              STACK_POP5(ISAd, first, last, limit, trlink);
+            }
+          }
+        } else {
+          STACK_POP5(ISAd, first, last, limit, trlink);
+        }
+      }
+      continue;
+    }
+
+    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
+      tr_insertionsort(ISAd, first, last);
+      limit = -3;
+      continue;
+    }
+
+    if(limit-- == 0) {
+      tr_heapsort(ISAd, first, last - first);
+      for(a = last - 1; first < a; a = b) {
+        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
+      }
+      limit = -3;
+      continue;
+    }
+
+    /* choose pivot */
+    a = tr_pivot(ISAd, first, last);
+    SWAP(*first, *a);
+    v = ISAd[*first];
+
+    /* partition */
+    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
+    if((last - first) != (b - a)) {
+      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
+
+      /* update ranks */
+      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
+
+      /* push */
+      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
+        if((a - first) <= (last - b)) {
+          if((last - b) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              last = a;
+            } else if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((a - first) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        } else {
+          if((a - first) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              first = b;
+            } else if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((last - b) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        }
+      } else {
+        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            last = a;
+          } else if(1 < (last - b)) {
+            first = b;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            first = b;
+          } else if(1 < (a - first)) {
+            last = a;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      }
+    } else {
+      if(trbudget_check(budget, last - first)) {
+        limit = tr_ilg(last - first), ISAd += incr;
+      } else {
+        if(0 <= trlink) { stack[trlink].d = -1; }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      }
+    }
+  }
+#undef STACK_SIZE
+}
+
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Tandem repeat sort */
+static
+void
+trsort(int *ISA, int *SA, int n, int depth) {
+  int *ISAd;
+  int *first, *last;
+  trbudget_t budget;
+  int t, skip, unsorted;
+
+  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
+/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
+  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
+    first = SA;
+    skip = 0;
+    unsorted = 0;
+    do {
+      if((t = *first) < 0) { first -= t; skip += t; }
+      else {
+        if(skip != 0) { *(first + skip) = skip; skip = 0; }
+        last = SA + ISA[t] + 1;
+        if(1 < (last - first)) {
+          budget.count = 0;
+          tr_introsort(ISA, ISAd, SA, first, last, &budget);
+          if(budget.count != 0) { unsorted += budget.count; }
+          else { skip = first - last; }
+        } else if((last - first) == 1) {
+          skip = -1;
+        }
+        first = last;
+      }
+    } while(first < (SA + n));
+    if(skip != 0) { *(first + skip) = skip; }
+    if(unsorted == 0) { break; }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Sorts suffixes of type B*. */
+static
+int
+sort_typeBstar(const unsigned char *T, int *SA,
+               int *bucket_A, int *bucket_B,
+               int n) {
+  int *PAb, *ISAb, *buf;
+  int i, j, k, t, m, bufsize;
+  int c0, c1;
+
+  /* Initialize bucket arrays. */
+  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
+  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
+
+  /* Count the number of occurrences of the first one or two characters of each
+     type A, B and B* suffix. Moreover, store the beginning position of all
+     type B* suffixes into the array SA. */
+  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
+    /* type A suffix. */
+    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
+    if(0 <= i) {
+      /* type B* suffix. */
+      ++BUCKET_BSTAR(c0, c1);
+      SA[--m] = i;
+      /* type B suffix. */
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
+        ++BUCKET_B(c0, c1);
+      }
+    }
+  }
+  m = n - m;
+/*
+note:
+  A type B* suffix is lexicographically smaller than a type B suffix that
+  begins with the same first two characters.
+*/
+
+  /* Calculate the index of start/end point of each bucket. */
+  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
+    t = i + BUCKET_A(c0);
+    BUCKET_A(c0) = i + j; /* start point */
+    i = t + BUCKET_B(c0, c0);
+    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
+      j += BUCKET_BSTAR(c0, c1);
+      BUCKET_BSTAR(c0, c1) = j; /* end point */
+      i += BUCKET_B(c0, c1);
+    }
+  }
+
+  if(0 < m) {
+    /* Sort the type B* suffixes by their first two characters. */
+    PAb = SA + n - m; ISAb = SA + m;
+    for(i = m - 2; 0 <= i; --i) {
+      t = PAb[i], c0 = T[t], c1 = T[t + 1];
+      SA[--BUCKET_BSTAR(c0, c1)] = i;
+    }
+    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
+    SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
+
+    /* Sort the type B* substrings using sssort. */
+    buf = SA + m, bufsize = n - (2 * m);
+    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+        i = BUCKET_BSTAR(c0, c1);
+        if(1 < (j - i)) {
+          sssort(T, PAb, SA + i, SA + j,
+                 buf, bufsize, 2, n, *(SA + i) == (m - 1));
+        }
+      }
+    }
+
+    /* Compute ranks of type B* substrings. */
+    for(i = m - 1; 0 <= i; --i) {
+      if(0 <= SA[i]) {
+        j = i;
+        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
+        SA[i + 1] = i - j;
+        if(i <= 0) { break; }
+      }
+      j = i;
+      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
+      ISAb[SA[i]] = j;
+    }
+
+    /* Construct the inverse suffix array of type B* suffixes using trsort. */
+    trsort(ISAb, SA, m, 1);
+
+    /* Set the sorted order of tyoe B* suffixes. */
+    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
+      if(0 <= i) {
+        t = i;
+        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
+        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
+      }
+    }
+
+    /* Calculate the index of start/end point of each bucket. */
+    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
+    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
+      i = BUCKET_A(c0 + 1) - 1;
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
+        t = i - BUCKET_B(c0, c1);
+        BUCKET_B(c0, c1) = i; /* end point */
+
+        /* Move all type B* suffixes to the correct position. */
+        for(i = t, j = BUCKET_BSTAR(c0, c1);
+            j <= k;
+            --i, --k) { SA[i] = SA[k]; }
+      }
+      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
+      BUCKET_B(c0, c0) = i; /* end point */
+    }
+  }
+
+  return m;
+}
+
+/* Constructs the suffix array by using the sorted order of type B* suffixes. */
+static
+void
+construct_SA(const unsigned char *T, int *SA,
+             int *bucket_A, int *bucket_B,
+             int n, int m) {
+  int *i, *j, *k;
+  int s;
+  int c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          LZ_ASSERT(T[s] == c1);
+          LZ_ASSERT(((s + 1) < n) && (T[s] <= T[s + 1]));
+          LZ_ASSERT(T[s - 1] <= T[s]);
+          *j = ~s;
+          c0 = T[--s];
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          LZ_ASSERT(k < j);
+          *k-- = s;
+        } else {
+          LZ_ASSERT(((s == 0) && (T[s] == c1)) || (s < 0));
+          *j = ~s;
+        }
+      }
+    }
+  }
+
+  /* Construct the suffix array by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n; i < j; ++i) {
+    if(0 < (s = *i)) {
+      LZ_ASSERT(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      LZ_ASSERT(i < k);
+      *k++ = s;
+    } else {
+      LZ_ASSERT(s < 0);
+      *i = ~s;
+    }
+  }
+}
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+/* XXX Modified from original: use provided temporary space instead of
+ * allocating it.  */
+void
+divsufsort(const u8 *T, u32 *SA, u32 n, u32 *bucket_A, u32 *bucket_B)
+{
+  u32 m;
+
+  switch (n) {
+    case 0:
+      break;
+
+    case 1:
+      SA[0] = 0;
+      break;
+
+    case 2:
+      m = (T[0] < T[1]);
+      SA[m ^ 1] = 0;
+      SA[m] = 1;
+      break;
+
+    default:
+      m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
+      construct_SA(T, SA, bucket_A, bucket_B, n, m);
+      break;
+  }
+}
diff --git a/src/lz_bt.c b/src/lz_binary_trees.c

similarity index 57%

rename from src/lz_bt.c

rename to src/lz_binary_trees.c

index 30250054f733f5574203f3f6bdabdddd8e50cb05..4a821f437ecd13232aea95347e8b86e9812a1020 100644 (file)
--- a/src/lz_bt.c
+++ b/src/lz_binary_trees.c
@@ -1,13 +1,32 @@
  /*
- * lz_bt.c
+ * lz_binary_trees.c
   *
   * Binary tree match-finder for Lempel-Ziv compression.
   *
- * Author:  Eric Biggers
- * Year:    2014
+ * Copyright (c) 2014 Eric Biggers.  All rights reserved.
   *
- * The author dedicates this file to the public domain.
- * You can do whatever you want with this file.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   */
  
  /*
@@ -19,16 +38,35 @@
  #  include "config.h"
  #endif
  
-#include "wimlib/lz.h"
-#include "wimlib/lz_bt.h"
+#include "wimlib/lz_mf.h"
  #include "wimlib/util.h"
-#include <string.h>
  #include <pthread.h>
+#include <string.h>
+
+/* Number of hash buckets.  This can be changed, but it should be a power of 2
+ * so that the correct hash bucket can be selected using a fast bitwise AND.  */
+#define LZ_BT_HASH_LEN         (1 << 16)
  
-#define LZ_BT_HASH_BITS                16
-#define LZ_BT_HASH_SIZE                (1 << LZ_BT_HASH_BITS)
-#define LZ_BT_HASH_MASK                (LZ_BT_HASH_SIZE - 1)
-#define LZ_BT_DIGRAM_TAB_SIZE  (256 * 256)
+/* Number of bytes from which the hash code is computed at each position.  This
+ * can be changed, provided that lz_bt_hash() is updated as well.  */
+#define LZ_BT_HASH_BYTES   3
+
+/* Number of entries in the digram table.
+ *
+ * Note:  You rarely get length-2 matches if you use length-3 hashing.  But
+ * since binary trees are typically used for higher compression ratios than hash
+ * chains, it is helpful for this match-finder to find length-2 matches as well.
+ * Therefore this match-finder also uses a digram table to find length-2 matches
+ * when the minimum match length is 2.  */
+#define LZ_BT_DIGRAM_TAB_LEN   (256 * 256)
+
+struct lz_bt {
+       struct lz_mf base;
+       u32 *hash_tab;
+       u32 *digram_tab;
+       u32 *child_tab;
+       u32 next_hash;
+};
  
  static u32 crc32_table[256];
  static pthread_once_t crc32_table_filled = PTHREAD_ONCE_INIT;
@@ -48,14 +86,9 @@ crc32_init(void)
          }
  }
  
-/*
- * Compute the hash code for the next 3-byte sequence in the window.
- *
- * @p
- *     A pointer to the next 3-byte sequence in the window.
- *
- * Returns the resulting hash code.
- */
+/* This hash function is taken from the LZMA SDK.  It seems to work well.
+
+ * TODO: Maybe use the SSE4.2 CRC32 instruction when available?  */
  static inline u32
  lz_bt_hash(const u8 *p)
  {
@@ -65,89 +98,75 @@ lz_bt_hash(const u8 *p)
         hash ^= p[1];
         hash ^= (u32)p[2] << 8;
  
-       return hash & LZ_BT_HASH_MASK;
+       return hash % LZ_BT_HASH_LEN;
  }
  
-/*
- * Compute the number of bytes of memory that would be needed to initialize a
- * binary tree match-finder with the specified maximum window size.
- *
- * @max_window_size
- *     The maximum window size, in bytes, to query.
- *
- * Returns the number of bytes that would be allocated by lz_bt_init(),
- * excluding the size of the 'struct lz_bt' itself.
- */
-u64
-lz_bt_get_needed_memory(lz_bt_pos_t max_window_size)
+static void
+lz_bt_set_default_params(struct lz_mf_params *params)
  {
-       u64 len;
+       if (params->min_match_len == 0)
+               params->min_match_len = 2;
+
+       if (params->max_match_len == 0)
+               params->max_match_len = params->max_window_size;
  
-       len = LZ_BT_HASH_SIZE + LZ_BT_DIGRAM_TAB_SIZE;
-       len += 2 * (u64)max_window_size;
+       if (params->max_search_depth == 0)
+               params->max_search_depth = 50;
  
-       return len * sizeof(lz_bt_pos_t);
+       if (params->nice_match_len == 0)
+               params->nice_match_len = 24;
+
+       if (params->nice_match_len < params->min_match_len)
+               params->nice_match_len = params->min_match_len;
+
+       if (params->nice_match_len > params->max_match_len)
+               params->nice_match_len = params->max_match_len;
  }
  
-/*
- * Initialize a binary tree match-finder.
- *
- * @mf
- *     The match-finder structure to initialize.
- * @max_window_size
- *     The maximum window size that shall be supported by subsequent calls to
- *     lz_bt_load_window().
- * @min_match_len
- *     The minimum length of matches that shall be produced by subsequent calls
- *     to lz_bt_get_matches().  This must be at least 2.
- * @max_match_len
- *     The maximum length of matches that shall be produced by subsequent calls
- *     to lz_bt_get_matches().  This must be at least @min_match_len.
- * @num_fast_bytes
- *     The maximum length of matches that shall be produced just using the
- *     binary tree search algorithm.  If the longest match has this length,
- *     then lz_bt_get_matches() will extend it up to @max_match_len.  This must
- *     be at least @min_match_len and no more than @max_match_len.
- * @max_search_depth
- *     The maximum depth to descend into the binary search tree before halting
- *     the search.
- *
- * Returns %true if successful; %false if out of memory.
- */
-bool
-lz_bt_init(struct lz_bt *mf,
-          lz_bt_pos_t max_window_size,
-          lz_bt_len_t min_match_len,
-          lz_bt_len_t max_match_len,
-          lz_bt_len_t num_fast_bytes,
-          u32 max_search_depth)
+static bool
+lz_bt_params_valid(const struct lz_mf_params *params)
+{
+       return true;
+}
+
+static u64
+lz_bt_get_needed_memory(u32 max_window_size)
  {
-       u64 len;
+       u64 len = 0;
+
+       len += LZ_BT_HASH_LEN;           /* hash_tab */
+       len += LZ_BT_DIGRAM_TAB_LEN;     /* digram_tab */
+       len += 2 * (u64)max_window_size; /* child_tab */
  
-       /* Check and set parameters.  */
-       LZ_ASSERT(min_match_len >= 2);
-       LZ_ASSERT(max_match_len >= min_match_len);
-       LZ_ASSERT(num_fast_bytes >= min_match_len);
-       LZ_ASSERT(num_fast_bytes <= max_match_len);
+       return len * sizeof(u32);
+}
+
+static bool
+lz_bt_init(struct lz_mf *_mf)
+{
+       struct lz_bt *mf = (struct lz_bt *)_mf;
+       struct lz_mf_params *params = &mf->base.params;
+       size_t len = 0;
  
-       mf->max_window_size = max_window_size;
-       mf->min_match_len = min_match_len;
-       mf->max_match_len = max_match_len;
-       mf->num_fast_bytes = num_fast_bytes;
-       mf->max_search_depth = max_search_depth;
+       lz_bt_set_default_params(params);
  
         /* Allocate space for 'hash_tab', 'digram_tab', and 'child_tab'.  */
-       len = LZ_BT_HASH_SIZE + (2 * (u64)max_window_size);
-       if (mf->min_match_len <= 2)
-               len += LZ_BT_DIGRAM_TAB_SIZE;
-       len *= sizeof(lz_bt_pos_t);
-       if ((size_t)len != len || !(mf->hash_tab = MALLOC(len)))
+
+       len += LZ_BT_HASH_LEN;
+       if (params->min_match_len == 2)
+               len += LZ_BT_DIGRAM_TAB_LEN;
+       len += 2 * params->max_window_size;
+
+       mf->hash_tab = MALLOC(len * sizeof(u32));
+       if (!mf->hash_tab)
                 return false;
-       if (mf->min_match_len <= 2) {
-               mf->digram_tab = mf->hash_tab + LZ_BT_HASH_SIZE;
-               mf->child_tab = mf->digram_tab + LZ_BT_DIGRAM_TAB_SIZE;
+
+       if (params->min_match_len == 2) {
+               mf->digram_tab = mf->hash_tab + LZ_BT_HASH_LEN;
+               mf->child_tab = mf->digram_tab + LZ_BT_DIGRAM_TAB_LEN;
         } else {
-               mf->child_tab = mf->hash_tab + LZ_BT_HASH_SIZE;
+               mf->digram_tab = NULL;
+               mf->child_tab = mf->hash_tab + LZ_BT_HASH_LEN;
         }
  
         /* Fill in the CRC32 table if not done already.  */
@@ -156,47 +175,21 @@ lz_bt_init(struct lz_bt *mf,
         return true;
  }
  
-/*
- * Destroy a binary tree match-finder.
- *
- * @mf
- *     The match-finder structure to destroy.
- */
-void
-lz_bt_destroy(struct lz_bt *mf)
-{
-       FREE(mf->hash_tab);
-       /* mf->hash_tab shares storage with mf->digram_tab and mf->child_tab. */
-}
-
-/*
- * Load a window into a binary tree match-finder.
- *
- * @mf
- *     The match-finder structure into which to load the window.
- * @window
- *     Pointer to the window to load.  This memory must remain available,
- *     unmodified, while the match-finder is being used.
- * @window_size
- *     The size of the window, in bytes.  This can't be larger than the
- *     @max_window_size with which lz_bt_init() was called.
- */
-void
-lz_bt_load_window(struct lz_bt *mf, const u8 *window, lz_bt_pos_t window_size)
+static void
+lz_bt_load_window(struct lz_mf *_mf, const u8 window[], u32 size)
  {
-       LZ_ASSERT(window_size <= mf->max_window_size);
+       struct lz_bt *mf = (struct lz_bt *)_mf;
         size_t clear_len;
  
-       mf->cur_window = window;
-       mf->cur_window_pos = 0;
-       mf->cur_window_size = window_size;
+       /* Clear hash_tab and digram_tab.
+        * Note: child_tab need not be cleared.  */
+       clear_len = LZ_BT_HASH_LEN;
+       if (mf->digram_tab)
+               clear_len += LZ_BT_DIGRAM_TAB_LEN;
+       memset(mf->hash_tab, 0, clear_len * sizeof(u32));
  
-       /* Clear the hash and digram tables.
-        * Note: The child table need not be cleared.  */
-       clear_len = LZ_BT_HASH_SIZE;
-       if (mf->min_match_len <= 2)
-               clear_len += LZ_BT_DIGRAM_TAB_SIZE;
-       memset(mf->hash_tab, 0, clear_len * sizeof(lz_bt_pos_t));
+       if (size >= LZ_BT_HASH_BYTES)
+               mf->next_hash = lz_bt_hash(window);
  }
  
  /*
@@ -207,13 +200,6 @@ lz_bt_load_window(struct lz_bt *mf, const u8 *window, lz_bt_pos_t window_size)
   *     The window being searched.
   * @cur_window_pos
   *     The current position in the window.
- * @min_len
- *     Ignore matches shorter than this length.  This must be at least 1.
- * @max_len
- *     Don't produce any matches longer than this length.  If we find a match
- *     this long, terminate the search and return.
- * @max_depth
- *     Stop if we reach this depth in the binary tree.
   * @child_tab
   *     Table of child pointers for the binary tree.  The children of the node
   *     for position 'i' in the window are child_tab[i * 2] and child_tab[i*2 +
@@ -225,6 +211,13 @@ lz_bt_load_window(struct lz_bt *mf, const u8 *window, lz_bt_pos_t window_size)
   *     The position in the window at which the binary tree for the current hash
   *     code is rooted.  This can be 0, which indicates that the binary tree for
   *     the current hash code is empty.
+ * @min_len
+ *     Ignore matches shorter than this length.  This must be at least 1.
+ * @max_len
+ *     Don't produce any matches longer than this length.  If we find a match
+ *     this long, terminate the search and return.
+ * @max_search_depth
+ *     Stop if we reach this depth in the binary tree.
   * @matches
   *     The array in which to produce the matches.  The matches will be produced
   *     in order of increasing length and increasing offset.  No more than one
@@ -234,14 +227,14 @@ lz_bt_load_window(struct lz_bt *mf, const u8 *window, lz_bt_pos_t window_size)
   *
   * Returns the number of matches found and written to @matches.
   */
-static lz_bt_len_t
+static u32
  do_search(const u8 window[restrict],
-         const lz_bt_pos_t cur_window_pos,
-         const lz_bt_len_t min_len,
-         const lz_bt_len_t max_len,
-         const u32 max_depth,
-         lz_bt_pos_t child_tab[restrict],
-         lz_bt_pos_t cur_match,
+         const u32 cur_window_pos,
+         u32 child_tab[restrict],
+         u32 cur_match,
+         const u32 min_len,
+         const u32 max_len,
+         const u32 max_search_depth,
           struct lz_match matches[restrict])
  {
         /*
@@ -327,9 +320,9 @@ do_search(const u8 window[restrict],
          * In degenerate cases, the binary tree might become severely
          * unbalanced.  To prevent excessive running times, we stop immediately
          * (and return any matches that happen to have been found so far) if the
-        * current depth exceeds @max_depth.  Note that this cutoff can occur
-        * before the longest match has been found, which is usually bad for the
-        * compression ratio.
+        * current depth exceeds @max_search_depth.  Note that this cutoff can
+        * occur before the longest match has been found, which is usually bad
+        * for the compression ratio.
          *
          * ---------------------------------------------------------------------
          *
@@ -397,17 +390,17 @@ do_search(const u8 window[restrict],
          * contain all valid positions.
          */
  
-       lz_bt_len_t num_matches = 0;
-       lz_bt_len_t longest_lt_match_len = 0;
-       lz_bt_len_t longest_gt_match_len = 0;
-       lz_bt_len_t longest_match_len = min_len - 1;
-       lz_bt_pos_t *pending_lt_ptr = &child_tab[cur_window_pos * 2 + 0];
-       lz_bt_pos_t *pending_gt_ptr = &child_tab[cur_window_pos * 2 + 1];
+       u32 num_matches = 0;
+       u32 longest_lt_match_len = 0;
+       u32 longest_gt_match_len = 0;
+       u32 longest_match_len = min_len - 1;
+       u32 *pending_lt_ptr = &child_tab[cur_window_pos * 2 + 0];
+       u32 *pending_gt_ptr = &child_tab[cur_window_pos * 2 + 1];
         const u8 *strptr = &window[cur_window_pos];
-       u32 depth_remaining = max_depth;
+       u32 depth_remaining = max_search_depth;
         for (;;) {
                 const u8 *matchptr;
-               lz_bt_len_t len;
+               u32 len;
  
                 if (depth_remaining-- == 0 || cur_match == 0) {
                         *pending_lt_ptr = 0;
@@ -429,7 +422,7 @@ do_search(const u8 window[restrict],
  
                                 matches[num_matches++] = (struct lz_match) {
                                         .len = len,
-                                       .offset = cur_window_pos - cur_match,
+                                       .offset = strptr - matchptr,
                                 };
  
                                 if (len == max_len) {
@@ -454,146 +447,84 @@ do_search(const u8 window[restrict],
         }
  }
  
-/*
- * Retrieve a list of matches at the next position in the window.
- *
- * @mf
- *     The binary tree match-finder structure into which a window has been
- *     loaded using lz_bt_load_window().
- * @matches
- *     The array into which the matches will be returned.  The length of this
- *     array must be at least (@mf->num_fast_bytes - @mf->min_match_len + 1).
- *
- * The return value is the number of matches that were found and stored in the
- * 'matches' array.  The matches will be ordered by strictly increasing length
- * and strictly increasing offset.  No match shall have length less than
- * @min_match_len, and no match shall have length greater than @max_match_len.
- * The return value may be 0, which indicates that no matches were found.
- *
- * On completion, the binary tree match-finder is advanced to the next position
- * in the window.
- */
-lz_bt_len_t
-lz_bt_get_matches(struct lz_bt *mf, struct lz_match matches[])
+static u32
+lz_bt_get_matches(struct lz_mf *_mf, struct lz_match matches[])
  {
-       lz_bt_pos_t bytes_remaining;
-       lz_bt_len_t num_matches;
-       lz_bt_pos_t cur_match;
+       struct lz_bt *mf = (struct lz_bt *)_mf;
+       const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
         u32 hash;
+       u32 cur_match;
+       u32 min_len;
+       u32 num_matches = 0;
  
-       LZ_ASSERT(mf->cur_window_pos < mf->cur_window_size);
-
-       bytes_remaining = lz_bt_get_remaining_size(mf);
-
-       /* If there are fewer than 3 bytes remaining, we can't even compute a
-        * hash to look up a binary tree root.  If there are exactly 2 bytes
-        * remaining we could still search for a length-2 match using the digram
-        * table, but it's not worth bothering.  (Note: this is also useful for
-        * LZX, since this excludes the length 2 match having the maximum
-        * offset, which isn't allowed.)  */
-       if (bytes_remaining < 3) {
-               mf->cur_window_pos++;
-               return 0;
-       }
-
-       num_matches = 0;
+       if (bytes_remaining <= LZ_BT_HASH_BYTES)
+               goto out;
  
-       /* Search the digram table for a length 2 match.  */
-       if (mf->min_match_len <= 2) {
-               u8 c1, c2;
-               u16 digram;
+       if (mf->digram_tab) {
+               /* Search the digram table for a length 2 match.  */
  
-               c1 = mf->cur_window[mf->cur_window_pos];
-               c2 = mf->cur_window[mf->cur_window_pos + 1];
-               digram = (u16)c1 | ((u16)c2 << 8);
+               const u16 digram = *(const u16 *)lz_mf_get_window_ptr(&mf->base);
                 cur_match = mf->digram_tab[digram];
-               mf->digram_tab[digram] = mf->cur_window_pos;
+               mf->digram_tab[digram] = mf->base.cur_window_pos;
  
                 /* We're only interested in matches of length exactly 2, since
-                * those won't be found during the binary tree search.  */
-               if (cur_match != 0 && mf->cur_window[cur_match + 2] !=
-                                     mf->cur_window[mf->cur_window_pos + 2])
+                * those won't be found during the binary tree search.
+                *
+                * Note: it's possible to extend this match as much as possible,
+                * then use its length plus 1 as min_len for the binary tree
+                * search.  However I found this actually *reduced* performance
+                * slightly, evidently because the binary tree still needs to be
+                * searched/updated starting from the root in either case.  */
+               if (cur_match != 0 &&
+                   (mf->base.cur_window[cur_match + 2] !=
+                    mf->base.cur_window[mf->base.cur_window_pos + 2]))
                 {
                         matches[num_matches++] = (struct lz_match) {
                                 .len = 2,
-                               .offset = mf->cur_window_pos - cur_match,
+                               .offset = mf->base.cur_window_pos - cur_match,
                         };
                 }
+               min_len = 3;
+       } else {
+               min_len = mf->base.params.min_match_len;
         }
  
-       /* Hash the length-3 byte sequence beginning at the current position in
-        * the window.  */
-       hash = lz_bt_hash(&mf->cur_window[mf->cur_window_pos]);
-
-       /* The corresponding hash bucket in 'hash_tab' contains the root of the
-        * binary tree of previous window positions that have the same hash
-        * code.  */
+       hash = mf->next_hash;
+       mf->next_hash = lz_bt_hash(lz_mf_get_window_ptr(&mf->base) + 1);
+       prefetch(&mf->hash_tab[mf->next_hash]);
         cur_match = mf->hash_tab[hash];
+       mf->hash_tab[hash] = mf->base.cur_window_pos;
  
-       /* Update the hash bucket to point to the binary tree rooted at the
-        * current position, which we will construct in do_search().  */
-       mf->hash_tab[hash] = mf->cur_window_pos;
-
-       /* Search the binary tree for matches.  At the same time, build the
-        * binary tree rooted at the current position, which replaces the one we
-        * search.  */
-       num_matches += do_search(mf->cur_window,
-                                mf->cur_window_pos,
-                                max(3, mf->min_match_len),
-                                min(bytes_remaining, mf->num_fast_bytes),
-                                mf->max_search_depth,
+       /* Search the binary tree of 'hash' for matches while re-rooting it at
+        * the current position.  */
+       num_matches += do_search(mf->base.cur_window,
+                                mf->base.cur_window_pos,
                                  mf->child_tab,
                                  cur_match,
+                                min_len,
+                                min(bytes_remaining, mf->base.params.nice_match_len),
+                                mf->base.params.max_search_depth,
                                  &matches[num_matches]);
  
-       /* If the longest match is @num_fast_bytes in length, it may have been
+       /* If the longest match is @nice_match_len in length, it may have been
          * truncated.  Try extending it up to the maximum match length.  */
-       if (num_matches != 0 && matches[num_matches - 1].len == mf->num_fast_bytes) {
-               lz_bt_pos_t limit;
-               const u8 *strptr, *matchptr;
-               lz_bt_len_t len;
-
-               limit = min(bytes_remaining, mf->max_match_len);
-               strptr = &mf->cur_window[mf->cur_window_pos];
-               matchptr = strptr - matches[num_matches - 1].offset;
+       if (num_matches != 0 &&
+           matches[num_matches - 1].len == mf->base.params.nice_match_len)
+       {
+               const u8 * const strptr = lz_mf_get_window_ptr(&mf->base);
+               const u8 * const matchptr = strptr - matches[num_matches - 1].offset;
+               const u32 len_limit = min(bytes_remaining, mf->base.params.max_match_len);
+               u32 len;
+
                 len = matches[num_matches - 1].len;
-               while (len < limit && strptr[len] == matchptr[len])
+               while (len < len_limit && strptr[len] == matchptr[len])
                         len++;
                 matches[num_matches - 1].len = len;
         }
  
-#ifdef ENABLE_LZ_DEBUG
-       /* Check the matches.  */
-       for (lz_bt_len_t i = 0; i < num_matches; i++) {
-               const u8 *matchptr, *strptr;
-
-               /* Length valid?  */
-               LZ_ASSERT(matches[i].len >= mf->min_match_len);
-               LZ_ASSERT(matches[i].len <= min(mf->max_match_len, bytes_remaining));
-
-               /* Offset valid?  */
-               LZ_ASSERT(matches[i].offset >= 1);
-               LZ_ASSERT(matches[i].offset <= lz_bt_get_position(mf));
-
-               /* Lengths and offsets strictly increasing?  */
-               if (i > 0) {
-                       LZ_ASSERT(matches[i].len > matches[i - 1].len);
-                       LZ_ASSERT(matches[i].offset > matches[i - 1].offset);
-               }
-
-               /* Actually a match?  */
-               strptr = lz_bt_get_window_ptr(mf);
-               matchptr = strptr - matches[i].offset;
-               LZ_ASSERT(!memcmp(strptr, matchptr, matches[i].len));
-
-               /* Match can't be extended further?  */
-               LZ_ASSERT(matches[i].len == min(mf->max_match_len, bytes_remaining) ||
-                         strptr[matches[i].len] != matchptr[matches[i].len]);
-       }
-#endif /* ENABLE_LZ_DEBUG  */
-
-       /* Advance to the next position in the window.  */
-       mf->cur_window_pos++;
+out:
+       /* Advance to the next position.  */
+       mf->base.cur_window_pos++;
  
         /* Return the number of matches found.  */
         return num_matches;
@@ -603,20 +534,21 @@ lz_bt_get_matches(struct lz_bt *mf, struct lz_match matches[])
   * See do_search() for explanatory comments.  */
  static void
  do_skip(const u8 window[restrict],
-       const lz_bt_pos_t cur_window_pos,
-       const lz_bt_len_t max_len,
-       u32 depth_remaining,
-       lz_bt_pos_t child_tab[restrict],
-       lz_bt_pos_t cur_match)
+       const u32 cur_window_pos,
+       u32 child_tab[restrict],
+       u32 cur_match,
+       const u32 max_len,
+       const u32 max_search_depth)
  {
-       lz_bt_len_t longest_lt_match_len = 0;
-       lz_bt_len_t longest_gt_match_len = 0;
-       lz_bt_pos_t *pending_lt_ptr = &child_tab[cur_window_pos * 2 + 0];
-       lz_bt_pos_t *pending_gt_ptr = &child_tab[cur_window_pos * 2 + 1];
+       u32 longest_lt_match_len = 0;
+       u32 longest_gt_match_len = 0;
+       u32 *pending_lt_ptr = &child_tab[cur_window_pos * 2 + 0];
+       u32 *pending_gt_ptr = &child_tab[cur_window_pos * 2 + 1];
         const u8 * const strptr = &window[cur_window_pos];
+       u32 depth_remaining = max_search_depth;
         for (;;) {
                 const u8 *matchptr;
-               lz_bt_len_t len;
+               u32 len;
  
                 if (depth_remaining-- == 0 || cur_match == 0) {
                         *pending_lt_ptr = 0;
@@ -650,57 +582,68 @@ do_skip(const u8 window[restrict],
         }
  }
  
-/* Skip the current position in the binary tree match-finder.  */
  static void
  lz_bt_skip_position(struct lz_bt *mf)
  {
-       lz_bt_pos_t bytes_remaining;
+       const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
         u32 hash;
-       lz_bt_pos_t cur_match;
-
-       LZ_ASSERT(mf->cur_window_pos < mf->cur_window_size);
+       u32 cur_match;
  
-       bytes_remaining = lz_bt_get_remaining_size(mf);
-
-       /* As explained in lz_bt_get_matches(), we don't search for matches if
-        * there are fewer than 3 bytes remaining in the window.  */
-       if (bytes_remaining < 3) {
-               mf->cur_window_pos++;
-               return;
-       }
+       if (bytes_remaining <= LZ_BT_HASH_BYTES)
+               goto out;
  
         /* Update the digram table.  */
-       if (mf->min_match_len <= 2) {
-               u8 c1, c2;
-               u16 digram;
-
-               c1 = mf->cur_window[mf->cur_window_pos];
-               c2 = mf->cur_window[mf->cur_window_pos + 1];
-               digram = (u16)c1 | ((u16)c2 << 8);
-               mf->digram_tab[digram] = mf->cur_window_pos;
+       if (mf->digram_tab) {
+               const u16 digram = *(const u16 *)lz_mf_get_window_ptr(&mf->base);
+               mf->digram_tab[digram] = mf->base.cur_window_pos;
         }
  
         /* Update the hash table.  */
-       hash = lz_bt_hash(&mf->cur_window[mf->cur_window_pos]);
+       hash = mf->next_hash;
+       mf->next_hash = lz_bt_hash(lz_mf_get_window_ptr(&mf->base) + 1);
+       prefetch(&mf->hash_tab[mf->next_hash]);
         cur_match = mf->hash_tab[hash];
-       mf->hash_tab[hash] = mf->cur_window_pos;
+       mf->hash_tab[hash] = mf->base.cur_window_pos;
  
         /* Update the binary tree for the appropriate hash code.  */
-       do_skip(mf->cur_window,
-               mf->cur_window_pos,
-               min(bytes_remaining, mf->num_fast_bytes),
-               mf->max_search_depth,
+       do_skip(mf->base.cur_window,
+               mf->base.cur_window_pos,
                 mf->child_tab,
-               cur_match);
+               cur_match,
+               min(bytes_remaining, mf->base.params.nice_match_len),
+               mf->base.params.max_search_depth);
  
+out:
         /* Advance to the next position.  */
-       mf->cur_window_pos++;
+       mf->base.cur_window_pos++;
  }
  
-/* Skip 'n' positions in the binary tree match-finder.  */
-void
-lz_bt_skip_positions(struct lz_bt *mf, unsigned n)
+static void
+lz_bt_skip_positions(struct lz_mf *_mf, u32 n)
  {
-       while (n--)
+       struct lz_bt *mf = (struct lz_bt *)_mf;
+
+       do {
                 lz_bt_skip_position(mf);
+       } while (--n);
+}
+
+static void
+lz_bt_destroy(struct lz_mf *_mf)
+{
+       struct lz_bt *mf = (struct lz_bt *)_mf;
+
+       FREE(mf->hash_tab);
+       /* mf->hash_tab shares storage with mf->digram_tab and mf->child_tab. */
  }
+
+const struct lz_mf_ops lz_binary_trees_ops = {
+       .params_valid      = lz_bt_params_valid,
+       .get_needed_memory = lz_bt_get_needed_memory,
+       .init              = lz_bt_init,
+       .load_window       = lz_bt_load_window,
+       .get_matches       = lz_bt_get_matches,
+       .skip_positions    = lz_bt_skip_positions,
+       .destroy           = lz_bt_destroy,
+       .struct_size       = sizeof(struct lz_bt),
+};
diff --git a/src/lz_brute_force.c b/src/lz_brute_force.c

new file mode 100644 (file)

index 0000000..3d186cc
--- /dev/null
+++ b/src/lz_brute_force.c
@@ -0,0 +1,161 @@
+/*
+ * lz_brute_force.c
+ *
+ * Brute force match-finder for Lempel-Ziv compression.
+ *
+ * Copyright (c) 2014 Eric Biggers.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib/lz_mf.h"
+#include "wimlib/util.h"
+
+static bool
+lz_bf_params_valid(const struct lz_mf_params *params)
+{
+       return true;
+}
+
+static u64
+lz_bf_get_needed_memory(u32 max_window_size)
+{
+       return 0;
+}
+
+static bool
+lz_bf_init(struct lz_mf *mf)
+{
+       if (mf->params.min_match_len == 0)
+               mf->params.min_match_len = 2;
+
+       if (mf->params.max_match_len == 0)
+               mf->params.max_match_len = mf->params.max_window_size;
+
+       if (mf->params.max_search_depth == 0)
+               mf->params.max_search_depth = 32;
+
+       mf->params.max_search_depth = DIV_ROUND_UP(mf->params.max_search_depth, 8);
+
+       if (mf->params.nice_match_len == 0)
+               mf->params.nice_match_len = 24;
+
+       if (mf->params.nice_match_len < mf->params.min_match_len)
+               mf->params.nice_match_len = mf->params.min_match_len;
+
+       if (mf->params.nice_match_len > mf->params.max_match_len)
+               mf->params.nice_match_len = mf->params.max_match_len;
+
+       return true;
+}
+
+static void
+lz_bf_load_window(struct lz_mf *mf, const u8 window[], u32 size)
+{
+}
+
+static u32
+lz_bf_get_matches(struct lz_mf *mf, struct lz_match matches[])
+{
+       const u8 * const strptr = lz_mf_get_window_ptr(mf);
+       const u32 max_len = min(lz_mf_get_bytes_remaining(mf),
+                               mf->params.nice_match_len);
+       u32 best_len = mf->params.min_match_len - 1;
+       u32 num_matches = 0;
+       const u8 *matchptr = strptr;
+
+       if (best_len >= max_len)
+               goto out;
+
+       while (matchptr-- > mf->cur_window) {
+               if (matchptr[best_len] == strptr[best_len] &&
+                   matchptr[best_len - 1] == strptr[best_len - 1] &&
+                   matchptr[0] == strptr[0])
+               {
+                       u32 len = 0;
+
+                       while (++len != max_len)
+                               if (matchptr[len] != strptr[len])
+                                       break;
+
+                       if (len > best_len) {
+                               matches[num_matches++] = (struct lz_match) {
+                                       .len = len,
+                                       .offset = strptr - matchptr,
+                               };
+                               best_len = len;
+                               if (best_len == max_len)
+                                       break;
+                               if (num_matches == mf->params.max_search_depth)
+                                       break;
+                       }
+               }
+       }
+
+       /* If the longest match is @nice_match_len in length, it may have been
+        * truncated.  Try extending it up to the maximum match length.  */
+       if (num_matches != 0 &&
+           matches[num_matches - 1].len == mf->params.nice_match_len)
+       {
+               const u8 * const matchptr = strptr - matches[num_matches - 1].offset;
+               const u32 len_limit = min(lz_mf_get_bytes_remaining(mf),
+                                         mf->params.max_match_len);
+               u32 len;
+
+               len = matches[num_matches - 1].len;
+               while (len < len_limit && strptr[len] == matchptr[len])
+                       len++;
+               matches[num_matches - 1].len = len;
+       }
+
+out:
+       mf->cur_window_pos++;
+       return num_matches;
+}
+
+static void
+lz_bf_skip_positions(struct lz_mf *mf, u32 n)
+{
+       mf->cur_window_pos += n;
+}
+
+static void
+lz_bf_destroy(struct lz_mf *mf)
+{
+}
+
+const struct lz_mf_ops lz_brute_force_ops = {
+       .params_valid      = lz_bf_params_valid,
+       .get_needed_memory = lz_bf_get_needed_memory,
+       .init              = lz_bf_init,
+       .load_window       = lz_bf_load_window,
+       .get_matches       = lz_bf_get_matches,
+       .skip_positions    = lz_bf_skip_positions,
+       .destroy           = lz_bf_destroy,
+       .struct_size       = sizeof(struct lz_mf),
+};
diff --git a/src/lz_hash.c b/src/lz_hash.c

deleted file mode 100644 (file)

index af60c2e..0000000
--- a/src/lz_hash.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * lz_hash.c
- *
- * This file provides the code to analyze a buffer of uncompressed data for
- * matches, as per the LZ77 algorithm.  It uses a hash table to accelerate the
- * process.  This is based on code from zlib (v. 1.2.5).
- */
-
-/*
- * Copyright (C) 2012, 2013 Eric Biggers
- * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
- *
- * This file is part of wimlib, a library for working with WIM files.
- *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
- *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
- */
-
-#ifdef HAVE_CONFIG_H
-#  include <config.h>
-#endif
-
-#include "wimlib/lz_hash.h"
-#include "wimlib/util.h"
-
-#include <string.h>
-
-#define HASH_BITS      15
-#define HASH_SIZE      (1 << HASH_BITS)
-#define HASH_MASK      (HASH_SIZE - 1)
-#define HASH_SHIFT     5
-
-/* Hash function, based on code from zlib.  This function will update and return
- * the hash value @hash for the string ending on the additional input character
- * @c.  This function must be called for each consecutive character, because it
- * uses a running hash value rather than computing it separately for each
- * 3-character string.
- *
- * The AND operation guarantees that only 3 characters will affect the hash
- * value, so every identical 3-character string will have the same hash value.
- */
-static inline unsigned
-update_hash(unsigned hash, u8 c)
-{
-       return ((hash << HASH_SHIFT) ^ c) & HASH_MASK;
-}
-
-
-/* Insert a 3-character string at position @str_pos in @window and with hash
- * code @hash into the hash table described by @hash_tab and @prev_tab.  Based
- * on code from zlib.
- *
- * The hash table uses chains (linked lists) for the hash buckets, but there are
- * no real pointers involved.  Indexing `hash_tab' by hash value gives the index
- * within the window of the last string in the hash bucket.  To find the index
- * of the previous string in the hash chain, the `prev_tab' array is indexed by
- * the string index.  `prev_tab' can be indexed repeatedly by the string index
- * to walk through the hash chain, until the special index `0' is reached,
- * indicating the end of the hash chain.
- */
-static inline unsigned
-insert_string(u32 hash_tab[], u32 prev_tab[],
-             const u8 window[], unsigned str_pos,
-             unsigned hash)
-{
-       hash = update_hash(hash, window[str_pos + 2]);
-       prev_tab[str_pos] = hash_tab[hash];
-       hash_tab[hash] = str_pos;
-       return hash;
-}
-
-
-/*
- * Returns the longest match for a given input position.
- *
- * @window:            The window of uncompressed data.
- * @bytes_remaining:   The number of bytes remaining in the window.
- * @strstart:          The index of the start of the string in the window that
- *                             we are trying to find a match for.
- * @prev_tab:          The array of prev pointers for the hash table.
- * @cur_match:         The index of the head of the hash chain for matches
- *                             having the hash value of the string beginning
- *                             at index @strstart.
- * @prev_len:          The length of the match that was found for the string
- *                             beginning at (@strstart - 1).
- * @match_start_ret:   A location into which the index of the start of the
- *                             match will be returned.
- * @params:            Parameters that affect how long the search will proceed
- *                             before going with the best that has been found
- *                             so far.
- * @min_start_pos:     If the chain reaches a match starting before this
- *                     position (including the end-of-chain 0), the search will
- *                     be terminated.
- *
- * Returns the length of the match that was found.
- */
-static unsigned
-longest_match(const u8 window[], unsigned bytes_remaining,
-             unsigned strstart, const u32 prev_tab[],
-             unsigned cur_match, unsigned prev_len,
-             unsigned *match_start_ret,
-             const struct lz_params *params,
-             unsigned min_start_pos)
-{
-       unsigned chain_len = params->max_chain_len;
-
-       const u8 *scan = window + strstart;
-       const u8 *match;
-       unsigned len;
-       unsigned best_len = prev_len;
-       unsigned match_start = cur_match;
-
-       unsigned nice_match = min(params->nice_match, bytes_remaining);
-
-       const u8 *strend = scan + min(params->max_match, bytes_remaining);
-
-       u8 scan_end1 = scan[best_len - 1];
-       u8 scan_end = scan[best_len];
-
-
-       /* Do not waste too much time if we already have a good match: */
-       if (best_len >= params->good_match)
-               chain_len >>= 2;
-
-       do {
-               match = &window[cur_match];
-
-               /* Skip to next match if the match length cannot increase or if
-                * the match length is less than 2.  Note that the checks below
-                * for insufficient lookahead only occur occasionally for
-                * performance reasons.  Therefore uninitialized memory will be
-                * accessed, and conditional jumps will be made that depend on
-                * those values.  However the length of the match is limited to
-                * the lookahead, so the output of lz_analyze_block() is not
-                * affected by the uninitialized values.  */
-
-               if (match[best_len] != scan_end
-                   || match[best_len - 1] != scan_end1
-                   || *match != *scan
-                   || *++match != scan[1])
-                       continue;
-               scan++;
-
-       #if 0
-               do {
-               } while (scan < strend && *++match == *++scan);
-       #else
-
-               do {
-               } while (
-                        *++match == *++scan && *++match == *++scan &&
-                        *++match == *++scan && *++match == *++scan &&
-                        *++match == *++scan && *++match == *++scan &&
-                        *++match == *++scan && *++match == *++scan &&
-                        scan < strend);
-       #endif
-               len = match - &window[cur_match];
-
-               scan = &window[strstart];
-
-               if (len > best_len) {
-                       match_start = cur_match;
-                       best_len = len;
-                       if (len >= nice_match)
-                               break;
-                       scan_end1  = scan[best_len - 1];
-                       scan_end   = scan[best_len];
-               }
-       } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) >= min_start_pos);
-       *match_start_ret = match_start;
-       return min(min(best_len, bytes_remaining), params->max_match);
-}
-
-
-
-/*
- * Determines the sequence of matches and literals that a block of data will be
- * compressed to.
- *
- * @window:            The data that is to be compressed.
- * @window_size:       The length of @window, in bytes.
- * @record_match:      Consumer for matches.
- * @record_literal:    Consumer for literals.
- * @record_ctx:                Context passed to @record_match and @record_literal.
- * @params:            Structure that contains parameters that affect how the
- *                             analysis proceeds (mainly how good the matches
- *                             have to be).
- * @prev_tab:          Temporary space containing least @window_size elements.
- */
-void
-lz_analyze_block(const u8 window[restrict],
-                u32 window_size,
-                lz_record_match_t record_match,
-                lz_record_literal_t record_literal,
-                void *record_ctx,
-                const struct lz_params *params,
-                u32 prev_tab[restrict])
-{
-       unsigned cur_input_pos = 0;
-       unsigned hash          = 0;
-       unsigned hash_head     = 0;
-       unsigned prev_len      = params->min_match - 1;
-       unsigned prev_start;
-       unsigned match_len     = params->min_match - 1;
-       unsigned match_start   = 0;
-       bool match_available = false;
-       u32 hash_tab[HASH_SIZE];
-       unsigned min_start_pos = 1;
-
-       ZERO_ARRAY(hash_tab);
-
-       do {
-               /* If there are at least 3 characters remaining in the input,
-                * insert the 3-character string beginning at
-                * window[cur_input_pos] into the hash table.
-                *
-                * hash_head is set to the index of the previous string in the
-                * hash bucket, or 0 if there is no such string */
-               if (window_size - cur_input_pos >= params->min_match) {
-                       hash = insert_string(hash_tab, prev_tab,
-                                            window,
-                                            cur_input_pos, hash);
-                       hash_head = prev_tab[cur_input_pos];
-               } else {
-                       hash_head = 0;
-               }
-
-
-               /* Find the longest match, discarding those <= prev_len. */
-               prev_len = match_len;
-               prev_start = match_start;
-               match_len = params->min_match - 1;
-
-               if (cur_input_pos > params->max_offset)
-                       min_start_pos = cur_input_pos - params->max_offset;
-               else
-                       min_start_pos = 1;
-
-               if (hash_head >= min_start_pos &&
-                   prev_len < params->max_lazy_match)
-               {
-                       /* To simplify the code, we prevent matches with the
-                        * string of window index 0 (in particular we have to
-                        * avoid a match of the string with itself at the start
-                        * of the input file).  */
-                       match_len = longest_match(window,
-                                                 window_size - cur_input_pos,
-                                                 cur_input_pos, prev_tab,
-                                                 hash_head, prev_len,
-                                                 &match_start, params,
-                                                 min_start_pos);
-
-                       if (match_len == params->min_match &&
-                            cur_input_pos - match_start > params->too_far)
-                               match_len = params->min_match - 1;
-               }
-
-               /* If there was a match at the previous step and the current
-                * match is not better, output the previous match:
-                */
-               if (prev_len >= params->min_match && match_len <= prev_len) {
-
-
-                       (*record_match)(prev_len,
-                                       cur_input_pos - 1 - prev_start,
-                                       record_ctx);
-
-                       /* Insert in hash table all strings up to the end of the
-                        * match.  strstart-1 and strstart are already inserted.
-                        * If there is not enough lookahead, the last two
-                        * strings are not inserted in the hash table.  */
-
-                       /* Do not insert strings in hash table beyond this. */
-                       unsigned max_insert = window_size - params->min_match;
-
-                       prev_len -= 2;
-
-                       do {
-                               if (++cur_input_pos <= max_insert) {
-                                       hash = insert_string(hash_tab, prev_tab,
-                                                            window,
-                                                            cur_input_pos,
-                                                            hash);
-                               }
-                       } while (--prev_len != 0);
-                       match_available = false;
-                       match_len = params->min_match - 1;
-               } else if (match_available) {
-                       /* If there was no match at the previous position,
-                        * output a single literal. If there was a match but the
-                        * current match is longer, truncate the previous match
-                        * to a single literal.  */
-                       (*record_literal)(window[cur_input_pos - 1], record_ctx);
-               } else {
-                       /* There is no previous match to compare with, wait for
-                        * the next step to decide.  */
-                       match_available = true;
-               }
-       } while (++cur_input_pos < window_size);
-
-       if (match_available)
-               (*record_literal)(window[cur_input_pos - 1], record_ctx);
-}
diff --git a/src/lz_hash_chains.c b/src/lz_hash_chains.c

new file mode 100644 (file)

index 0000000..7eacac9
--- /dev/null
+++ b/src/lz_hash_chains.c
@@ -0,0 +1,303 @@
+/*
+ * lz_hash_chains.c
+ *
+ * Hash chain match-finder for Lempel-Ziv compression.
+ *
+ * Copyright (c) 2014 Eric Biggers.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib/lz_mf.h"
+#include "wimlib/util.h"
+#include <pthread.h>
+#include <string.h>
+
+/* Number of hash buckets.  This can be changed, but should be a power of 2 so
+ * that the correct hash bucket can be selected using a fast bitwise AND.  */
+#define LZ_HC_HASH_LEN     (1 << 15)
+
+/* Number of bytes from which the hash code is computed at each position.  This
+ * can be changed, provided that lz_hc_hash() is updated as well.  */
+#define LZ_HC_HASH_BYTES   3
+
+struct lz_hc {
+       struct lz_mf base;
+       u32 *hash_tab;
+       u32 *prev_tab;
+       u32 next_hash;
+};
+
+static u32 crc32_table[256];
+static pthread_once_t crc32_table_filled = PTHREAD_ONCE_INIT;
+
+static void
+crc32_init(void)
+{
+        for (u32 b = 0; b < 256; b++) {
+                u32 r = b;
+                for (int i = 0; i < 8; i++) {
+                        if (r & 1)
+                                r = (r >> 1) ^ 0xEDB88320;
+                        else
+                                r >>= 1;
+                }
+                crc32_table[b] = r;
+        }
+}
+
+/* This hash function is taken from the LZMA SDK.  It seems to work well.
+ *
+ * TODO: Maybe use the SSE4.2 CRC32 instruction when available?  */
+static inline u32
+lz_hc_hash(const u8 *p)
+{
+       u32 hash = 0;
+
+       hash ^= crc32_table[p[0]];
+       hash ^= p[1];
+       hash ^= (u32)p[2] << 8;
+
+       return hash % LZ_HC_HASH_LEN;
+}
+
+static void
+lz_hc_set_default_params(struct lz_mf_params *params)
+{
+       if (params->min_match_len < LZ_HC_HASH_BYTES)
+               params->min_match_len = LZ_HC_HASH_BYTES;
+
+       if (params->max_match_len == 0)
+               params->max_match_len = params->max_window_size;
+
+       if (params->max_search_depth == 0)
+               params->max_search_depth = 50;
+
+       if (params->nice_match_len == 0)
+               params->nice_match_len = 24;
+
+       if (params->nice_match_len < params->min_match_len)
+               params->nice_match_len = params->min_match_len;
+
+       if (params->nice_match_len > params->max_match_len)
+               params->nice_match_len = params->max_match_len;
+}
+
+static bool
+lz_hc_params_valid(const struct lz_mf_params *_params)
+{
+       struct lz_mf_params params = *_params;
+
+       lz_hc_set_default_params(&params);
+
+       /* Avoid edge case where min_match_len = 3, max_match_len = 2 */
+       return (params.min_match_len <= params.max_match_len);
+}
+
+static u64
+lz_hc_get_needed_memory(u32 max_window_size)
+{
+       u64 len = 0;
+
+       len += LZ_HC_HASH_LEN;
+       len += max_window_size;
+
+       return len * sizeof(u32);
+}
+
+static bool
+lz_hc_init(struct lz_mf *_mf)
+{
+       struct lz_hc *mf = (struct lz_hc *)_mf;
+
+       lz_hc_set_default_params(&mf->base.params);
+
+       /* Allocate space for 'hash_tab' and 'prev_tab'.  */
+
+       mf->hash_tab = MALLOC(lz_hc_get_needed_memory(mf->base.params.max_window_size));
+       if (!mf->hash_tab)
+               return false;
+
+       mf->prev_tab = mf->hash_tab + LZ_HC_HASH_LEN;
+
+       /* Fill in the CRC32 table if not done already.  */
+       pthread_once(&crc32_table_filled, crc32_init);
+
+       return true;
+}
+
+static void
+lz_hc_load_window(struct lz_mf *_mf, const u8 window[], u32 size)
+{
+       struct lz_hc *mf = (struct lz_hc *)_mf;
+
+       memset(mf->hash_tab, 0, LZ_HC_HASH_LEN * sizeof(u32));
+
+       if (size >= LZ_HC_HASH_BYTES)
+               mf->next_hash = lz_hc_hash(window);
+}
+
+static inline u32
+do_search(const u8 * restrict window,
+         const u32 cur_window_pos,
+         u32 prev_tab[restrict],
+         u32 cur_match,
+         const u32 min_len,
+         const u32 max_len,
+         const u32 max_search_depth,
+         struct lz_match matches[restrict])
+{
+       const u8 * const strptr = &window[cur_window_pos];
+       u32 best_len = min_len - 1;
+       u32 depth_remaining = max_search_depth;
+       u32 num_matches = 0;
+
+       for (; cur_match && depth_remaining--; cur_match = prev_tab[cur_match]) {
+
+               const u8 * const matchptr = &window[cur_match];
+
+               if (matchptr[best_len] == strptr[best_len] &&
+                   matchptr[best_len - 1] == strptr[best_len - 1] &&
+                   matchptr[0] == strptr[0])
+               {
+                       u32 len = 0;
+
+                       while (++len != max_len)
+                               if (matchptr[len] != strptr[len])
+                                       break;
+
+                       if (len > best_len) {
+                               matches[num_matches++] = (struct lz_match) {
+                                       .len = len,
+                                       .offset = strptr - matchptr,
+                               };
+                               best_len = len;
+                               if (best_len == max_len)
+                                       break;
+                       }
+               }
+       }
+       return num_matches;
+}
+
+static u32
+lz_hc_get_matches(struct lz_mf *_mf, struct lz_match matches[])
+{
+       struct lz_hc *mf = (struct lz_hc *)_mf;
+       const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
+       u32 hash;
+       u32 cur_match;
+       u32 num_matches = 0;
+
+       if (bytes_remaining <= LZ_HC_HASH_BYTES)
+               goto out;
+
+       hash = mf->next_hash;
+       mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1);
+       prefetch(&mf->hash_tab[mf->next_hash]);
+       cur_match = mf->hash_tab[hash];
+       mf->hash_tab[hash] = mf->base.cur_window_pos;
+       mf->prev_tab[mf->base.cur_window_pos] = cur_match;
+
+       num_matches = do_search(mf->base.cur_window,
+                               mf->base.cur_window_pos,
+                               mf->prev_tab,
+                               cur_match,
+                               mf->base.params.min_match_len,
+                               min(bytes_remaining, mf->base.params.nice_match_len),
+                               mf->base.params.max_search_depth,
+                               matches);
+
+       /* If the longest match is @nice_match_len in length, it may have been
+        * truncated.  Try extending it up to the maximum match length.  */
+       if (num_matches != 0 &&
+           matches[num_matches - 1].len == mf->base.params.nice_match_len)
+       {
+               const u8 * const strptr = lz_mf_get_window_ptr(&mf->base);
+               const u8 * const matchptr = strptr - matches[num_matches - 1].offset;
+               const u32 len_limit = min(bytes_remaining, mf->base.params.max_match_len);
+               u32 len;
+
+               len = matches[num_matches - 1].len;
+               while (len < len_limit && strptr[len] == matchptr[len])
+                       len++;
+               matches[num_matches - 1].len = len;
+       }
+
+out:
+       mf->base.cur_window_pos++;
+       return num_matches;
+}
+
+static void
+lz_hc_skip_position(struct lz_hc *mf)
+{
+       const u32 bytes_remaining = lz_mf_get_bytes_remaining(&mf->base);
+       u32 hash;
+
+       if (bytes_remaining <= LZ_HC_HASH_BYTES)
+               goto out;
+
+       hash = mf->next_hash;
+       mf->next_hash = lz_hc_hash(lz_mf_get_window_ptr(&mf->base) + 1);
+       prefetch(&mf->hash_tab[mf->next_hash]);
+       mf->prev_tab[mf->base.cur_window_pos] = mf->hash_tab[hash];
+       mf->hash_tab[hash] = mf->base.cur_window_pos;
+
+out:
+       mf->base.cur_window_pos++;
+}
+
+static void
+lz_hc_skip_positions(struct lz_mf *_mf, u32 n)
+{
+       struct lz_hc *mf = (struct lz_hc *)_mf;
+
+       do {
+               lz_hc_skip_position(mf);
+       } while (--n);
+}
+
+static void
+lz_hc_destroy(struct lz_mf *_mf)
+{
+       struct lz_hc *mf = (struct lz_hc *)_mf;
+
+       FREE(mf->hash_tab);
+}
+
+const struct lz_mf_ops lz_hash_chains_ops = {
+       .params_valid      = lz_hc_params_valid,
+       .get_needed_memory = lz_hc_get_needed_memory,
+       .init              = lz_hc_init,
+       .load_window       = lz_hc_load_window,
+       .get_matches       = lz_hc_get_matches,
+       .skip_positions    = lz_hc_skip_positions,
+       .destroy           = lz_hc_destroy,
+       .struct_size       = sizeof(struct lz_hc),
+};
diff --git a/src/lz_lcp_interval_tree.c b/src/lz_lcp_interval_tree.c

new file mode 100644 (file)

index 0000000..a6c72d5
--- /dev/null
+++ b/src/lz_lcp_interval_tree.c
@@ -0,0 +1,576 @@
+/*
+ * lz_lcp_interval_tree.c
+ *
+ * A match-finder for Lempel-Ziv compression based on bottom-up construction and
+ * traversal of the Longest Common Prefix (LCP) interval tree.
+ *
+ * Copyright (c) 2014 Eric Biggers.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib/lz_mf.h"
+#include "wimlib/lz_suffix_array_utils.h"
+#include "wimlib/util.h"
+
+/*
+ * To save space, we pack lcp (longest common prefix) and position values into
+ * 32-bit integers.  Therefore, we must divide the 32 bits into lcp and position
+ * bits.  6 lcp bits seems to be a good value, since matches of length 64 are
+ * sufficiently long so that the compression ratio isn't hurt much by choosing
+ * one such match over another.  We also use 1 bit to mark intervals as "not yet
+ * visited".  This leaves 25 bits, which when used for position results in a
+ * maximum window size of 33554432 bytes.
+ */
+#define LZ_LCPIT_LCP_BITS              6
+#define LZ_LCPIT_LCP_MASK              ((1 << LZ_LCPIT_LCP_BITS) - 1)
+#define LZ_LCPIT_LCP_MAX               LZ_LCPIT_LCP_MASK
+#define LZ_LCPIT_POS_BITS              (32 - 1 - LZ_LCPIT_LCP_BITS)
+#define LZ_LCPIT_MAX_WINDOW_SIZE       (1UL << LZ_LCPIT_POS_BITS)
+
+struct lz_lcpit {
+       struct lz_mf base;
+
+       /* Each of the arrays has length equal to the window size.  This
+        * therefore results in an additional memory usage of 12 bytes per
+        * position.  (That's compared to about 8 for binary trees or 4 for hash
+        * chains, for example.)
+        *
+        * We allocate these arrays in one contiguous block.  'SA' is first,
+        * 'intervals' is second, and 'pos_data' is third.  */
+
+       /* Pointer to the suffix array  */
+       u32 *SA;
+
+       /* Mapping: lcp-interval index => lcp-interval data
+        *
+        * Initially, the lcp-interval data for an lcp-interval contains that
+        * interval's lcp and superinterval index.
+        *
+        * After a lcp-interval is visited during match-finding, its
+        * lcp-interval data contains that interval's lcp and the position of
+        * the next suffix to consider as a match when matching against that
+        * lcp-interval.  */
+       u32 *intervals;
+
+       /* Mapping: suffix index ("window position") => lcp-interval index  */
+       u32 *pos_data;
+};
+
+/*
+ * Use the suffix array accompanied with the longest-common-prefix array --- in
+ * other words, the "enhanced suffix array" --- to simulate a bottom-up
+ * traversal of the corresponding suffix tree, or equivalently the "lcp-interval
+ * tree", as described in Abouelhoda et al. (2004).
+ *
+ * While doing the traversal, create a table 'intervals' that contains data for
+ * each lcp-interval, specifically the lcp value of that interval, and the index
+ * of the superinterval.
+ *
+ * Also while doing the traversal, create a table 'pos_data' that contains a
+ * mapping from suffix index to the deepest lcp-interval containing it.
+ *
+ * The result is that we will later be able to do match-finding at a given
+ * position by looking up that position in 'pos_data' to get the deepest
+ * lcp-interval containing the corresponding suffix, then proceeding to the
+ * superintervals.  See lz_lcpit_get_matches() for more details.
+ *
+ * Note: We limit the depth of the lcp-interval tree by capping the lcp at
+ * LZ_LCPIT_LCP_MAX.  This can cause a sub-tree of intervals with lcp greater
+ * than LZ_LCPIT_LCP_MAX to be collapsed into a single interval with lcp
+ * LZ_LCPIT_LCP_MAX.  This avoids degenerate cases and does not hurt
+ * match-finding very much, since if we find a match of length LZ_LCPIT_LCP_MAX
+ * and extend it as far as possible, that's usually good enough because that
+ * region of the input must already be highly compressible.
+ *
+ * References:
+ *
+ *     M.I. Abouelhoda, S. Kurtz, E. Ohlebusch.  2004.  Replacing Suffix Trees
+ *     With Enhanced Suffix Arrays.  Journal of Discrete Algorithms Volume 2
+ *     Issue 1, March 2004, pp. 53-86.
+ *
+ *     G. Chen, S.J. Puglisi, W.F. Smyth.  2008.  Lempel-Ziv Factorization
+ *     Using Less Time & Space.  Mathematics in Computer Science June 2008,
+ *     Volume 1, Issue 4, pp. 605-623.
+ *
+ *     Kasai et al. Linear-Time Longest-Common-Prefix Computation in Suffix
+ *     Arrays and Its Applications.  2001.  CPM '01 Proceedings of the 12th
+ *     Annual Symposium on Combinatorial Pattern Matching pp. 181-192.
+ */
+static void
+build_LCPIT(const u32 SA[restrict], u32 LCP[restrict],
+           u32 pos_data[restrict], const u32 lcp_limit, const u32 n)
+{
+       u32 *intervals = LCP;
+       u32 next_interval;
+       u32 incomplete_intervals[lcp_limit + 1];
+       u32 *cur_interval;
+       u32 prev_pos;
+
+       /* As we determine lcp-intervals, we assign each one an entry in
+        * 'intervals', overwriting LCP in the process.  Each such entry will
+        * contain the index in 'intervals' of the superinterval, along with the
+        * longest common prefix length that the suffixes in that interval
+        * share.
+        *
+        * Note: since we don't need its memory for anything, we don't overwrite
+        * the suffix array, even though this could potentially be done since
+        * it's not actually used during match-finding.  */
+
+       /* Process rank 0 as special case.  This creates the lcp-interval
+        * containing every suffix in the window.  */
+       prev_pos = SA[0];
+       intervals[0] = 0;
+       pos_data[prev_pos] = 0;
+       cur_interval = incomplete_intervals;
+       *cur_interval = 0;
+       next_interval = 1;
+
+       /* Iterate through each suffix array rank.  */
+       for (u32 r = 1; r < n; r++) {
+
+               /* Get the longest common prefix (lcp) between the suffixes with
+                * ranks r and r - 1.  But cap it to the lcp limit.  */
+               const u32 lcp = min(LCP[r], lcp_limit);
+
+               /* Convert rank => position using the suffix array.  */
+               const u32 pos = SA[r];
+
+               /* cur_interval points to the index of the deepest (highest lcp
+                * value) incomplete lcp-interval.  */
+
+               /*
+                * There are three cases:
+                *
+                * (1) The lcp stayed the same as the previous value.  Place the
+                * current suffix in cur_interval.  (This placement is
+                * tentative, because if LCP increases at the next rank, this
+                * suffix could still be placed in the resulting new LCP
+                * interval instead.)  cur_interval remains unchanged.
+                *
+                * (2) The lcp increased from the previous value.  This signals
+                * the beginning of a new lcp-interval.  Create it and push it
+                * onto the stack of incomplete intervals.  But since lcp is
+                * defined in terms of the longest prefix between this suffix
+                * and the *previous* ranked suffix, the new lcp-interval
+                * actually should have begun at the *previous* ranked suffix.
+                * Therefore, we need to set both pos_data[pos] and
+                * pos_data[prev_pos] to refer to the new interval.
+                *
+                * (3) The lcp decreased from the previous value.  This signals
+                * the termination of at least one lcp-interval.  Pop the stack,
+                * finalizing the lcp-intervals, until the current lcp is at
+                * least as large as the lcp associated with cur_interval.
+                * Then, if the current lcp is equal to the lcp associated with
+                * cur_interval, place the current suffix in cur_interval,
+                * similar to case (1).  Else, create a new lcp-interval,
+                * similar to case (2).
+                */
+
+               if (lcp == (intervals[*cur_interval] & LZ_LCPIT_LCP_MASK)) {
+
+                       /* Case (1) */
+
+                       pos_data[pos] = *cur_interval;
+
+               } else if (lcp > (intervals[*cur_interval] & LZ_LCPIT_LCP_MASK)) {
+
+                       /* Case (2) */
+
+                       intervals[next_interval] = lcp | 0x80000000;
+                       pos_data[prev_pos] = next_interval;
+                       pos_data[pos] = next_interval;
+                       *++cur_interval = next_interval++;
+
+               } else {
+
+                       /* Case (3) */
+
+                       u32 interval;
+                       u32 superinterval;
+
+                       for (;;) {
+                               /* Examine the deepest incomplete lcp-interval
+                                * and its superinterval.  */
+
+                               interval = *cur_interval;
+                               superinterval = *--cur_interval;
+
+                               if (lcp >= (intervals[superinterval] &
+                                           LZ_LCPIT_LCP_MASK))
+                                       break;
+
+                               /* The current suffix can't go in either of
+                                * them.  Therefore we're visiting 'interval'
+                                * for the last time and finalizing its
+                                * membership in 'superinterval'.  */
+
+                               intervals[interval] |=
+                                       (superinterval << LZ_LCPIT_LCP_BITS);
+                       }
+
+                       /* The current suffix can't go in 'interval', but it can
+                        * go in 'superinterval'.  */
+
+                       if (lcp > (intervals[superinterval] & LZ_LCPIT_LCP_MASK)) {
+                               /* Creating a new lcp-interval that is a
+                                * superinterval of 'interval' but a subinterval
+                                * of 'superinterval'.
+                                *
+                                * Example: with the LCP arrayl
+                                *
+                                *            2  2  2  4  4  3
+                                *
+                                * then we will execute this case when
+                                * processing the LCP value 3.  The LCP
+                                * intervals will be:
+                                *
+                                *            2  2  2  4  4  3
+                                * (lcp=0):  |                |
+                                * (lcp=2):  |                |
+                                * (lcp=3):        |          |
+                                * (lcp=4):        |       |
+                                *
+                                * Note that the 3-interval (the one being
+                                * created by this code) is a superinterval of
+                                * the 4-interval (which already existed)!  But
+                                * we don't need to re-assign pos_data values in
+                                * the 4-interval because they point to the
+                                * deepest interval which contains them, which
+                                * is the 4-interval.  */
+
+                               intervals[next_interval] = lcp | 0x80000000;
+                               intervals[interval] |=
+                                       (next_interval << LZ_LCPIT_LCP_BITS);
+                               pos_data[pos] = next_interval;
+                               *++cur_interval = next_interval++;
+                       } else {
+                               /* Finishing 'interval', continuing with
+                                * 'superinterval'.  */
+
+                               intervals[interval] |=
+                                       (superinterval << LZ_LCPIT_LCP_BITS);
+                               pos_data[pos] = superinterval;
+                       }
+               }
+
+               /* Remember this suffix index when processing the next-ranked
+                * suffix.  */
+               prev_pos = pos;
+       }
+
+       /* Finalize any still-incomplete lcp-intervals.  */
+       while (intervals[*cur_interval] & LZ_LCPIT_LCP_MASK) {
+               intervals[*cur_interval] |=
+                       (*(cur_interval - 1) << LZ_LCPIT_LCP_BITS);
+               cur_interval--;
+       }
+}
+
+static void
+lz_lcpit_set_default_params(struct lz_mf_params *params)
+{
+       if (params->min_match_len == 0)
+               params->min_match_len = 2;
+
+       if (params->max_match_len == 0)
+               params->max_match_len = params->max_window_size;
+
+       if (params->max_search_depth == 0)
+               params->max_search_depth = 32;
+
+       params->max_search_depth = DIV_ROUND_UP(params->max_search_depth, 8);
+
+       if (params->nice_match_len == 0)
+               params->nice_match_len = LZ_LCPIT_LCP_MAX;
+
+       if (params->nice_match_len < params->min_match_len)
+               params->nice_match_len = params->min_match_len;
+
+       if (params->nice_match_len > params->max_match_len)
+               params->nice_match_len = params->max_match_len;
+
+       if (params->nice_match_len > LZ_LCPIT_LCP_MAX)
+               params->nice_match_len = LZ_LCPIT_LCP_MAX;
+}
+
+static bool
+lz_lcpit_params_valid(const struct lz_mf_params *params)
+{
+       return params->max_window_size <= LZ_LCPIT_MAX_WINDOW_SIZE;
+}
+
+static u64
+lz_lcpit_get_needed_memory(u32 max_window_size)
+{
+       return sizeof(u32) * (max_window_size +
+                             max(BUILD_SA_MIN_TMP_LEN,
+                                 2 * (u64)max_window_size));
+}
+
+static bool
+lz_lcpit_init(struct lz_mf *_mf)
+{
+       struct lz_lcpit *mf = (struct lz_lcpit *)_mf;
+
+       lz_lcpit_set_default_params(&mf->base.params);
+
+       mf->SA = MALLOC(lz_lcpit_get_needed_memory(mf->base.params.max_window_size));
+       if (!mf->SA)
+               return false;
+
+       return true;
+}
+
+static void
+lz_lcpit_load_window(struct lz_mf *_mf, const u8 T[], u32 n)
+{
+       struct lz_lcpit *mf = (struct lz_lcpit *)_mf;
+       u32 *mem = mf->SA;
+
+       build_SA(&mem[0 * n], T, n, &mem[1 * n]);
+       build_ISA(&mem[2 * n], &mem[0 * n], n);
+       build_LCP(&mem[1 * n], &mem[0 * n], &mem[2 * n], T, n);
+       build_LCPIT(&mem[0 * n], &mem[1 * n], &mem[2 * n],
+                   mf->base.params.nice_match_len, n);
+       mf->SA = &mem[0 * n];
+       mf->intervals = &mem[1 * n];
+       mf->pos_data = &mem[2 * n];
+}
+
+static u32
+lz_lcpit_get_matches(struct lz_mf *_mf, struct lz_match matches[])
+{
+       struct lz_lcpit *mf = (struct lz_lcpit *)_mf;
+       const u32 min_match_len = mf->base.params.min_match_len;
+       const u32 cur_pos = mf->base.cur_window_pos;
+       u32 * const pos_data = mf->pos_data;
+       u32 * const intervals = mf->intervals;
+       u32 num_matches = 0;
+       u32 lcp, next_lcp;
+       u32 interval, next_interval;
+       u32 cur_match, next_match;
+
+       /* Look up the deepest lcp-interval containing the current suffix.  */
+       interval = pos_data[cur_pos];
+
+       /* Since the current position is greater than any position previously
+        * searched, set the "lcp interval of the next match" for this suffix to
+        * 0.  This is the index of the root interval, and this indicates that
+        * there is no next match.  */
+       pos_data[cur_pos] = 0;
+
+       /* Ascend the lcp-interval tree until we reach an lcp-interval that has
+        * already been visited.  */
+
+       while (intervals[interval] & 0x80000000) {
+
+               /* Visiting this lcp-interval for the first time.  Therefore,
+                * there are no Lempel-Ziv matches with length equal to the lcp
+                * of this lcp-interval.  */
+
+               /* Extract the LCP and superinterval reference.  */
+
+               lcp = intervals[interval] & LZ_LCPIT_LCP_MASK;
+
+               next_interval = (intervals[interval] & ~0x80000000)
+                                       >> LZ_LCPIT_LCP_BITS;
+
+               /* If the LCP is shorter than the minimum length of matches to
+                * be produced, we're done, since the LCP will only ever get
+                * shorter from here.  This also prevents ascending above the
+                * root of the lcp-interval tree, since the root is guaranteed
+                * to be a 0-interval, and min_match_len is guaranteed to be at
+                * least 2.  */
+               if (lcp < min_match_len)
+                       goto out;
+
+               /* Set the position of the most-recently-seen suffix within this
+                * lcp-interval.  Since this is the first visitation of this
+                * lcp-interval, this is simply the current suffix.
+                *
+                * Note that this overwrites the superinterval reference which
+                * was previously included in this lcp-interval data slot.
+                * Further visitations of this lcp-interval will detect that it
+                * is already visited and will follow the chain of
+                * most-recently-seen suffixes rather than ascend the tree
+                * directly.  */
+               intervals[interval] = (cur_pos << LZ_LCPIT_LCP_BITS) | lcp;
+
+               /* Ascend to the superinterval of this lcp-interval.  */
+               interval = next_interval;
+       }
+
+       /* We're already visited the current lcp-interval.  */
+
+       /* Extract the LCP of this lcp-interval.  */
+       lcp = intervals[interval] & LZ_LCPIT_LCP_MASK;
+
+       /* Extract the current match for this lcp-interval.  This usually is the
+        * most-recently-seen suffix within this lcp-interval, but it may be
+        * outdated.  */
+       cur_match = intervals[interval] >> LZ_LCPIT_LCP_BITS;
+
+       for (;;) {
+               /* If the LCP is shorter than the minimum length of matches to
+                * be produced, we're done, since the LCP will only ever get
+                * shorter from here.  This also prevents ascending above the
+                * root of the lcp-interval tree, since the root is guaranteed
+                * to be a 0-interval, and min_match_len is guaranteed to be at
+                * least 2.  */
+               if (lcp < min_match_len)
+                       break;
+
+               /* Advance the current match until the lcp of the *next* match
+                * is lower than the current lcp.  When this is true we know
+                * that the current match is up to date (lowest offset /
+                * greatest position for that lcp).  */
+
+               next_match = cur_match;
+               do {
+                       next_interval = pos_data[next_match];
+                       next_lcp = intervals[next_interval] & LZ_LCPIT_LCP_MASK;
+                       cur_match = next_match;
+                       next_match = intervals[next_interval] >> LZ_LCPIT_LCP_BITS;
+               } while (next_lcp >= lcp);
+
+               /* Link the current position into the match chain, discarding
+                * any skipped matches.  */
+               intervals[interval] = (cur_pos << LZ_LCPIT_LCP_BITS) | lcp;
+               pos_data[cur_match] = interval;
+
+               /* Record the match.  */
+               matches[num_matches++] = (struct lz_match) {
+                       .len = lcp,
+                       .offset = cur_pos - cur_match,
+               };
+
+               /* Bound the number of matches per position.  */
+               if (num_matches >= mf->base.params.max_search_depth)
+                       break;
+
+               /* Advance to the next match.  */
+               interval = next_interval;
+               lcp = next_lcp;
+               cur_match = next_match;
+       }
+
+       /* If the length of the longest match is equal to the lcp limit, it may
+        * have been truncated.  Try extending it up to the maximum match
+        * length.  */
+       if (num_matches && matches[0].len == mf->base.params.nice_match_len) {
+               const u8 * const strptr = lz_mf_get_window_ptr(&mf->base);
+               const u8 * const matchptr = strptr - matches[0].offset;
+               const u32 len_limit = min(lz_mf_get_bytes_remaining(&mf->base),
+                                         mf->base.params.max_match_len);
+               u32 len;
+
+               len = matches[0].len;
+               while (len < len_limit && strptr[len] == matchptr[len])
+                       len++;
+               matches[0].len = len;
+       }
+
+       for (u32 i = 0; i < num_matches / 2; i++)
+               swap(matches[i], matches[num_matches - 1 - i]);
+out:
+       mf->base.cur_window_pos++;
+       return num_matches;
+}
+
+/* Slightly simplified version of lz_lcpit_get_matches() for updating the data
+ * structures when we don't actually need matches at the current position.  See
+ * lz_lcpit_get_matches() for explanatory comments.  */
+static void
+lz_lcpit_skip_position(struct lz_lcpit *mf)
+{
+       const u32 min_match_len = mf->base.params.min_match_len;
+       const u32 cur_pos = mf->base.cur_window_pos++;
+       u32 * const pos_data = mf->pos_data;
+       u32 * const intervals = mf->intervals;
+       u32 lcp, next_lcp;
+       u32 interval, next_interval;
+       u32 cur_match, next_match;
+
+       interval = pos_data[cur_pos];
+       pos_data[cur_pos] = 0;
+       while (intervals[interval] & 0x80000000) {
+               lcp = intervals[interval] & LZ_LCPIT_LCP_MASK;
+               next_interval = (intervals[interval] & ~0x80000000)
+                                       >> LZ_LCPIT_LCP_BITS;
+               if (lcp < min_match_len)
+                       return;
+               intervals[interval] = (cur_pos << LZ_LCPIT_LCP_BITS) | lcp;
+               interval = next_interval;
+       }
+       lcp = intervals[interval] & LZ_LCPIT_LCP_MASK;
+       cur_match = intervals[interval] >> LZ_LCPIT_LCP_BITS;
+       while (lcp >= min_match_len) {
+               next_match = cur_match;
+               do {
+                       next_interval = pos_data[next_match];
+                       next_lcp = intervals[next_interval] & LZ_LCPIT_LCP_MASK;
+                       cur_match = next_match;
+                       next_match = intervals[next_interval] >> LZ_LCPIT_LCP_BITS;
+               } while (next_lcp >= lcp);
+               intervals[interval] = (cur_pos << LZ_LCPIT_LCP_BITS) | lcp;
+               pos_data[cur_match] = interval;
+               interval = next_interval;
+               lcp = next_lcp;
+               cur_match = next_match;
+       }
+}
+
+static void
+lz_lcpit_skip_positions(struct lz_mf *_mf, u32 n)
+{
+       struct lz_lcpit *mf = (struct lz_lcpit *)_mf;
+
+       do {
+               lz_lcpit_skip_position(mf);
+       } while (--n);
+}
+
+static void
+lz_lcpit_destroy(struct lz_mf *_mf)
+{
+       struct lz_lcpit *mf = (struct lz_lcpit *)_mf;
+
+       FREE(mf->SA);
+}
+
+const struct lz_mf_ops lz_lcp_interval_tree_ops = {
+       .params_valid      = lz_lcpit_params_valid,
+       .get_needed_memory = lz_lcpit_get_needed_memory,
+       .init              = lz_lcpit_init,
+       .load_window       = lz_lcpit_load_window,
+       .get_matches       = lz_lcpit_get_matches,
+       .skip_positions    = lz_lcpit_skip_positions,
+       .destroy           = lz_lcpit_destroy,
+       .struct_size       = sizeof(struct lz_lcpit),
+};
diff --git a/src/lz_linked_suffix_array.c b/src/lz_linked_suffix_array.c

new file mode 100644 (file)

index 0000000..1791844
--- /dev/null
+++ b/src/lz_linked_suffix_array.c
@@ -0,0 +1,722 @@
+/*
+ * lz_linked_suffix_array.c
+ *
+ * Linked suffix array match-finder for Lempel-Ziv compression.
+ *
+ * Copyright (c) 2013, 2014 Eric Biggers.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "wimlib/lz_mf.h"
+#include "wimlib/lz_suffix_array_utils.h"
+#include "wimlib/util.h"
+
+struct salink;
+
+/* Length type --- must be an unsigned type large enough to hold the maximum
+ * match length.  */
+typedef u16 lz_lsa_len_t;
+
+/* Type of distances in suffix array links.  A larger type would allow skipping
+ * irrelevant suffixes more quickly, which is especially helpful towards the
+ * start of the window.  However, even a single byte allows skipping 255 at a
+ * time, which where it matters is already a big improvement over the
+ * alternative of searching the suffixes consecutively.  */
+typedef u8 lz_lsa_delta_t;
+
+#define LZ_LSA_LEN_MAX         ((lz_lsa_len_t)~0UL)
+#define LZ_LSA_POS_MAX         ((u32)~0UL)
+#define LZ_LSA_DELTA_MAX       ((lz_lsa_delta_t)~0UL)
+
+/* State of the linked suffix array match-finder.  */
+struct lz_lsa {
+
+       struct lz_mf base;
+
+       /* Suffix array for the current window.
+        * This is a mapping from suffix rank to suffix position.  */
+       u32 *SA;
+
+       /* Inverse suffix array for the current window.
+        * This is a mapping from suffix position to suffix rank.
+        * If 0 <= r < window_size, then ISA[SA[r]] == r.  */
+       u32 *ISA;
+
+       /* Suffix array links.
+        *
+        * During a linear scan of the input string to find matches, this array
+        * used to keep track of which rank suffixes in the suffix array appear
+        * before the current position.  Instead of searching in the original
+        * suffix array, scans for matches at a given position traverse a linked
+        * list containing (usually) only suffixes that appear before that
+        * position.  */
+       struct salink *salink;
+};
+
+/* Suffix array link.  An array of these structures, one per suffix rank, is
+ * used as a replacement for the raw LCP (Longest Common Prefix) array to allow
+ * skipping over suffixes that appear later in the window and hence cannot be
+ * used as LZ77 matches.  */
+struct salink {
+       union {
+               /* Temporary fields used while this structure is being
+                * initialized.
+                *
+                * Note: we want the entire `struct salink' to be only 6 bytes,
+                * even though this makes "next_initial" unaligned.  */
+               struct {
+                       u32 next_initial;
+                       lz_lsa_len_t lcpnext_initial;
+               } _packed_attribute;
+
+               struct {
+                       /* Intially, the length, in bytes, of the longest common
+                        * prefix (LCP) between the suffix having this rank and
+                        * the suffix with the smallest larger rank that
+                        * starts earlier in the window than the suffix having
+                        * this rank.  If no such suffix exists, this will be 0.
+                        *
+                        * Later, during match-finding, after the corresponding
+                        * suffix has entered the LZ77 dictionary, this value
+                        * may be updated by lz_lsa_update_salink() to refer
+                        * instead to a lexicographically closer (but still
+                        * larger) suffix that begins at a later position that
+                        * has entered the LZ77 dictionary.  */
+                       lz_lsa_len_t   lcpnext;
+
+                       /* Initially, the length, in bytes, of the longest
+                        * common prefix (LCP) between the suffix having this
+                        * rank and the suffix with the largest smaller rank
+                        * that starts earlier in the window than the suffix
+                        * having this rank.  If no such suffix exists, this
+                        * will be 0.
+                        *
+                        * Later, during match-finding, after the corresponding
+                        * suffix has entered the LZ77 dictionary, this value
+                        * may be updated by lz_lsa_update_salink() to refer
+                        * instead to a lexicographically closer (but still
+                        * smaller) suffix that begins at a later position that
+                        * has entered the LZ77 dictionary.  */
+                       lz_lsa_len_t   lcpprev;
+
+                       /* Distance to the suffix referred to in the description
+                        * of "lcpnext" above, but capped to a maximum value to
+                        * save memory; or, 0 if no such suffix exists.  If the
+                        * true distance was truncated, this will give the
+                        * distance to the rank of a suffix that is
+                        * lexicographically closer to the current suffix than
+                        * the desired suffix, but appears *later* in the window
+                        * and hence cannot be used as the basis for an LZ77
+                        * match.  */
+                       lz_lsa_delta_t dist_to_next;
+
+                       /* Distance to the suffix referred to in the description
+                        * of "lcpprev" above, but capped to a maximum value to
+                        * save memory; or, 0 if no such suffix exists.  If the
+                        * true distance was truncated, this will give the
+                        * distance to the rank of a suffix that is
+                        * lexicographically closer to the current suffix than
+                        * the desired suffix, but appears *later* in the window
+                        * and hence cannot be used as the basis for an LZ77
+                        * match.  */
+                       lz_lsa_delta_t dist_to_prev;
+               };
+       };
+};
+
+/* Initialize the SA link array in linear time.
+ *
+ * This is similar to computing the LPF (Longest Previous Factor) array, which
+ * is addressed in several papers.  In particular the algorithms below are based
+ * on Crochemore et al. 2009: "LPF computation revisited".  However, this
+ * match-finder does not actually compute or use the LPF array per se.  Rather,
+ * this function sets up some information necessary to compute the LPF array,
+ * but later lz_lsa_get_matches() actually uses this information to search
+ * the suffix array directly and can keep searching beyond the first (longest)
+ * match whose length would be placed in the LPF array.  This difference from
+ * the theoretical work is necessary because in many real compression formats
+ * matches take variable numbers of bits to encode, so a decent parser needs to
+ * consider more than just the longest match with unspecified offset.
+ *
+ * Note: We cap the lcpprev and lcpnext values to the maximum match length so
+ * that the match-finder need not worry about it later, in the inner loop.
+ *
+ * Note: the LCP array is one of the inputs to this function, but it is used as
+ * temporary space and therefore will be invalidated.
+ */
+static void
+init_salink(struct salink link[restrict], u32 LCP[restrict],
+           const u32 SA[restrict], const u8 T[restrict], u32 n,
+           lz_lsa_len_t min_match_len, lz_lsa_len_t max_match_len)
+{
+       /* Calculate salink.dist_to_next and salink.lcpnext.
+        *
+        * Pass 1 calculates, for each suffix rank, the corresponding
+        * "next_initial" value which is the smallest larger rank that
+        * corresponds to a suffix starting earlier in the string.  It also
+        * calculates "lcpnext_initial", which is the longest common prefix with
+        * that suffix, although to eliminate checks in lz_lsa_get_matches(),
+        * "lcpnext_initial" is set to 0 if it's less than the minimum match
+        * length or set to the maximum match length if it's greater than the
+        * maximum match length.
+        *
+        * Pass 2 translates each absolute "next_initial", a 4-byte value, into
+        * a relative "dist_to_next", a 1-byte value.  This is done to save
+        * memory.  In the case that the exact relative distance cannot be
+        * encoded in 1 byte, it is capped to 255.  This is valid as long as
+        * lz_lsa_get_matches() validates each position before using it.
+        * Note that "lcpnext" need not be updated in this case because it will
+        * not be used until the actual next rank has been found anyway.
+        */
+       link[n - 1].next_initial = LZ_LSA_POS_MAX;
+       link[n - 1].lcpnext_initial = 0;
+       for (u32 r = n - 2; r != LZ_LSA_POS_MAX; r--) {
+               u32 t = r + 1;
+               u32 l = LCP[t];
+               while (t != LZ_LSA_POS_MAX && SA[t] > SA[r]) {
+                       l = min(l, link[t].lcpnext_initial);
+                       t = link[t].next_initial;
+               }
+               link[r].next_initial = t;
+
+               if (l < min_match_len)
+                       l = 0;
+               else if (l > max_match_len)
+                       l = max_match_len;
+               link[r].lcpnext_initial = l;
+       }
+       for (u32 r = 0; r < n; r++) {
+               u32 next;
+               lz_lsa_len_t l;
+               lz_lsa_delta_t dist_to_next;
+
+               next = link[r].next_initial;
+               l = link[r].lcpnext_initial;
+
+               if (next == LZ_LSA_POS_MAX)
+                       dist_to_next = 0;
+               else if (next - r <= LZ_LSA_DELTA_MAX)
+                       dist_to_next = next - r;
+               else
+                       dist_to_next = LZ_LSA_DELTA_MAX;
+
+               link[r].lcpnext = l;
+               link[r].dist_to_next = dist_to_next;
+       }
+
+       /* Calculate salink.dist_to_prev and salink.lcpprev.
+        *
+        * This is analgous to dist_to_next and lcpnext as described above, but
+        * in the other direction.  That is, here we're interested in, for each
+        * rank, the largest smaller rank that corresponds to a suffix starting
+        * earlier in the string.
+        *
+        * To save memory we don't have a "prev_initial" field, but rather store
+        * those values in the LCP array.  */
+       LCP[0] = LZ_LSA_POS_MAX;
+       link[0].lcpprev = 0;
+       for (u32 r = 1; r < n; r++) {
+               u32 t = r - 1;
+               u32 l = LCP[r];
+               while (t != LZ_LSA_POS_MAX && SA[t] > SA[r]) {
+                       l = min(l, link[t].lcpprev);
+                       t = LCP[t];
+               }
+               LCP[r] = t;
+
+               if (l < min_match_len)
+                       l = 0;
+               else if (l > max_match_len)
+                       l = max_match_len;
+
+               link[r].lcpprev = l;
+       }
+       for (u32 r = 0; r < n; r++) {
+
+               u32 prev = LCP[r];
+
+               if (prev == LZ_LSA_POS_MAX)
+                       link[r].dist_to_prev = 0;
+               else if (r - prev <= LZ_LSA_DELTA_MAX)
+                       link[r].dist_to_prev = r - prev;
+               else
+                       link[r].dist_to_prev = LZ_LSA_DELTA_MAX;
+       }
+}
+
+/* If ENABLE_LZ_DEBUG is defined, verify the values computed by init_salink().
+ *
+ * WARNING: this is for debug use only as it does not necessarily run in linear
+ * time!!!  */
+static void
+verify_salink(const struct salink link[], const u32 SA[], const u8 T[], u32 n,
+             lz_lsa_len_t min_match_len, lz_lsa_len_t max_match_len)
+{
+#ifdef ENABLE_LZ_DEBUG
+       for (u32 r = 0; r < n; r++) {
+               for (u32 prev = r; ; ) {
+                       if (prev == 0) {
+                               LZ_ASSERT(link[r].dist_to_prev == 0);
+                               LZ_ASSERT(link[r].lcpprev == 0);
+                               break;
+                       }
+
+                       prev--;
+
+                       if (SA[prev] < SA[r]) {
+                               LZ_ASSERT(link[r].dist_to_prev == min(r - prev, LZ_LSA_DELTA_MAX));
+
+                               u32 lcpprev;
+                               for (lcpprev = 0;
+                                    lcpprev < min(n - SA[prev], n - SA[r]) &&
+                                            T[SA[prev] + lcpprev] == T[SA[r] + lcpprev];
+                                    lcpprev++)
+                                       ;
+                               if (lcpprev < min_match_len)
+                                       lcpprev = 0;
+                               else if (lcpprev > max_match_len)
+                                       lcpprev = max_match_len;
+
+                               LZ_ASSERT(lcpprev == link[r].lcpprev);
+                               break;
+                       }
+               }
+
+               for (u32 next = r; ; ) {
+                       if (next == n - 1) {
+                               LZ_ASSERT(link[r].dist_to_next == 0);
+                               LZ_ASSERT(link[r].lcpnext == 0);
+                               break;
+                       }
+
+                       next++;
+
+                       if (SA[next] < SA[r]) {
+                               LZ_ASSERT(link[r].dist_to_next == min(next - r, LZ_LSA_DELTA_MAX));
+
+                               u32 lcpnext;
+                               for (lcpnext = 0;
+                                    lcpnext < min(n - SA[next], n - SA[r]) &&
+                                            T[SA[next] + lcpnext] == T[SA[r] + lcpnext];
+                                    lcpnext++)
+                                       ;
+                               if (lcpnext < min_match_len)
+                                       lcpnext = 0;
+                               else if (lcpnext > max_match_len)
+                                       lcpnext = max_match_len;
+
+                               LZ_ASSERT(lcpnext == link[r].lcpnext);
+                               break;
+                       }
+               }
+       }
+#endif
+}
+
+static inline void
+lz_lsa_update_salink(const u32 r, struct salink link[])
+{
+       const u32 next = r + link[r].dist_to_next;
+       const u32 prev = r - link[r].dist_to_prev;
+
+       if (next != r && link[r].dist_to_next < link[next].dist_to_prev) {
+               link[next].dist_to_prev = link[r].dist_to_next;
+               link[next].lcpprev = link[r].lcpnext;
+       }
+
+       if (prev != r && link[r].dist_to_prev < link[prev].dist_to_next) {
+               link[prev].dist_to_next = link[r].dist_to_prev;
+               link[prev].lcpnext = link[r].lcpprev;
+       }
+}
+
+static void
+lz_lsa_set_default_params(struct lz_mf_params *params)
+{
+       if (params->min_match_len == 0)
+               params->min_match_len = 2;
+
+       if (params->max_match_len == 0)
+               params->max_match_len = params->max_window_size;
+
+       if (params->max_match_len > LZ_LSA_LEN_MAX)
+               params->max_match_len = LZ_LSA_LEN_MAX;
+
+       if (params->max_search_depth == 0)
+               params->max_search_depth = 32;
+
+       /* Scale max_search_depth down since this algorithm finds the longest
+        * matches first.  */
+       params->max_search_depth = DIV_ROUND_UP(params->max_search_depth, 5);
+}
+
+static u64
+lz_lsa_get_needed_memory(u32 max_window_size)
+{
+       u64 size = 0;
+
+       /* SA */
+       size += (u64)max_window_size * sizeof(u32);
+
+       /* ISA */
+       size += (u64)max_window_size * sizeof(u32);
+
+       /* salink and minimum temporary space for divsufsort  */
+       size += max(BUILD_SA_MIN_TMP_LEN * sizeof(u32),
+                   (u64)max_window_size * sizeof(struct salink));
+
+       return size;
+}
+
+static bool
+lz_lsa_params_valid(const struct lz_mf_params *params)
+{
+       return true;
+}
+
+static bool
+lz_lsa_init(struct lz_mf *_mf)
+{
+       struct lz_lsa *mf = (struct lz_lsa *)_mf;
+       const u32 max_window_size = mf->base.params.max_window_size;
+
+       lz_lsa_set_default_params(&mf->base.params);
+
+       /* SA and ISA will share the same allocation.  */
+       mf->SA = MALLOC(max_window_size * 2 * sizeof(u32));
+       if (!mf->SA)
+               return false;
+
+       mf->salink = MALLOC(max(BUILD_SA_MIN_TMP_LEN * sizeof(u32),
+                               max_window_size * sizeof(struct salink)));
+       if (!mf->salink) {
+               FREE(mf->SA);
+               return false;
+       }
+
+       return true;
+}
+
+static void
+lz_lsa_load_window(struct lz_mf *_mf, const u8 T[], u32 n)
+{
+       struct lz_lsa *mf = (struct lz_lsa *)_mf;
+       u32 *ISA, *LCP;
+
+       build_SA(mf->SA, T, n, (u32 *)mf->salink);
+
+       /* Compute ISA (Inverse Suffix Array) in a preliminary position.
+        *
+        * This is just a trick to save memory.  Since LCP is unneeded after
+        * this function, it can be computed in any available space.  The
+        * storage for the ISA is the best choice because the ISA can be built
+        * quickly in salink for now, then re-built in its real location at the
+        * end.  This is probably worth it because computing the ISA from the SA
+        * is very fast, and since this match-finder is memory-hungry we'd like
+        * to save as much memory as possible.  */
+       BUILD_BUG_ON(sizeof(mf->salink[0]) < sizeof(mf->ISA[0]));
+       ISA = (u32 *)mf->salink;
+       build_ISA(ISA, mf->SA, n);
+
+       /* Compute LCP (Longest Common Prefix) array.  */
+       LCP = mf->SA + n;
+       build_LCP(LCP, mf->SA, ISA, T, n);
+
+       /* Initialize suffix array links.  */
+       init_salink(mf->salink, LCP, mf->SA, T, n,
+                   mf->base.params.min_match_len,
+                   mf->base.params.max_match_len);
+       verify_salink(mf->salink, mf->SA, T, n,
+                     mf->base.params.min_match_len,
+                     mf->base.params.max_match_len);
+
+       /* Compute ISA (Inverse Suffix Array) in its final position.  */
+       ISA = mf->SA + n;
+       build_ISA(ISA, mf->SA, n);
+
+       /* Save new variables and return.  */
+       mf->ISA = ISA;
+}
+
+static u32
+lz_lsa_get_matches(struct lz_mf *_mf, struct lz_match matches[])
+{
+       struct lz_lsa *mf = (struct lz_lsa *)_mf;
+       const u32 i = mf->base.cur_window_pos++;
+
+       const u32 * const restrict SA = mf->SA;
+       const u32 * const restrict ISA = mf->ISA;
+       struct salink * const restrict link = mf->salink;
+
+       /* r = Rank of the suffix at the current position.  */
+       const u32 r = ISA[i];
+
+       /* Prepare for searching the current position.  */
+       lz_lsa_update_salink(r, link);
+
+       /* Prefetch next position in SA and link.
+        *
+        * This can improve performance on large windows since the locations in
+        * SA and link at which each successive search begins are in general
+        * randomly distributed.  */
+       if (likely(i + 1 < mf->base.cur_window_size)) {
+               const u32 next_r = ISA[i + 1];
+               prefetch(&SA[next_r]);
+               prefetch(&link[next_r]);
+       }
+
+       /* L = rank of next suffix to the left;
+        * R = rank of next suffix to the right;
+        * lenL = length of match between current position and the suffix with rank L;
+        * lenR = length of match between current position and the suffix with rank R.
+        *
+        * This is left and right relative to the rank of the current suffix.
+        * Since the suffixes in the suffix array are sorted, the longest
+        * matches are immediately to the left and right (using the linked list
+        * to ignore all suffixes that occur later in the window).  The match
+        * length decreases the farther left and right we go.  We shall keep the
+        * length on both sides in sync in order to choose the lowest-cost match
+        * of each length.
+        */
+       u32 L = r - link[r].dist_to_prev;
+       u32 R = r + link[r].dist_to_next;
+       u32 lenL = link[r].lcpprev;
+       u32 lenR = link[r].lcpnext;
+
+       /* num_matches = number of matches found so far.  */
+       u32 num_matches = 0;
+
+       /* best_offset = offset of lowest-cost match found so far.
+        *
+        * Shorter matches that do not have a lower offset than this are
+        * discarded, since presumably it would be cheaper to output the bytes
+        * from the longer match instead.  */
+       u32 best_offset = LZ_LSA_POS_MAX;
+
+       /* count_remaining = maximum number of possible matches remaining to be
+        * considered.  */
+       u32 count_remaining = mf->base.params.max_search_depth;
+
+       /* pending_offset = offset of lowest-cost match found for the current
+        * length, or 0 if none found yet.  */
+       u32 pending_offset = 0;
+
+       /* Note: some 'goto' statements are used in the remainder of this
+        * function to remove unnecessary checks and create branches that the
+        * CPU may predict better.  (This function is performance critical.)  */
+
+       if (lenL != 0 && lenL >= lenR)
+               goto extend_left;
+       else if (lenR != 0)
+               goto extend_right;
+       else
+               return 0;
+
+extend_left:
+       /* Search suffixes on the left until the match length has decreased
+        * below the next match length on the right or to below the minimum
+        * match length.  */
+       for (;;) {
+               u32 offset;
+               u32 old_L;
+               u32 old_lenL;
+
+               /* Check for hard cutoff on amount of work done.  */
+               if (count_remaining-- == 0) {
+                       if (pending_offset != 0) {
+                               /* Save pending match.  */
+                               matches[num_matches++] = (struct lz_match) {
+                                       .len = lenL,
+                                       .offset = pending_offset,
+                               };
+                       }
+                       goto out;
+               }
+
+               if (SA[L] < i) {
+                       /* Suffix is in LZ77 dictionary.  (Check was needed
+                        * because the salink array caps distances to save
+                        * memory.)  */
+
+                       offset = i - SA[L];
+
+                       /* Save match offset if it results in lower cost.  */
+                       if (offset < best_offset) {
+                               best_offset = offset;
+                               pending_offset = offset;
+                       }
+               }
+
+               /* Advance left to previous suffix.  */
+
+               old_L = L;
+               old_lenL = lenL;
+
+               L -= link[L].dist_to_prev;
+
+               if (link[old_L].lcpprev < old_lenL) {
+                       /* Match length decreased.  */
+
+                       lenL = link[old_L].lcpprev;
+
+                       if (old_lenL > lenR) {
+                               /* Neither the right side nor the left size has
+                                * any more matches of length @old_lenL.  If a
+                                * pending match exists, save it.  */
+                               if (pending_offset != 0) {
+                                       matches[num_matches++] = (struct lz_match) {
+                                               .len = old_lenL,
+                                               .offset = pending_offset,
+                                       };
+                                       pending_offset = 0;
+                               }
+
+                               if (lenL >= lenR) {
+                                       /* New match length on left is still at
+                                        * least as large as the next match
+                                        * length on the right:  Keep extending
+                                        * left, unless the minimum match length
+                                        * would be underrun.  */
+                                       if (lenL == 0)
+                                               goto out;
+                                       goto extend_left;
+                               }
+                       }
+
+                       /* Here we have lenL < lenR.  Extend right.
+                        * (No check for whether the minimum match length has
+                        * been underrun is needed, provided that such lengths
+                        * are marked as 0.)  */
+                       goto extend_right;
+               }
+       }
+
+extend_right:
+       /* Search suffixes on the right until the match length has decreased to
+        * the next match length on the left or to below the minimum match
+        * length.  */
+       for (;;) {
+               u32 offset;
+               u32 old_R;
+               u32 old_lenR;
+
+               /* Check for hard cutoff on amount of work done.  */
+               if (count_remaining-- == 0) {
+                       if (pending_offset != 0) {
+                               /* Save pending match.  */
+                               matches[num_matches++] = (struct lz_match) {
+                                       .len = lenR,
+                                       .offset = pending_offset,
+                               };
+                       }
+                       goto out;
+               }
+
+               if (SA[R] < i) {
+                       /* Suffix is in LZ77 dictionary.  (Check was needed
+                        * because the salink array caps distances to save
+                        * memory.)  */
+
+                       offset = i - SA[R];
+
+                       if (offset < best_offset) {
+                               best_offset = offset;
+                               pending_offset = offset;
+                       }
+               }
+
+               /* Advance right to next suffix.  */
+
+               old_R = R;
+               old_lenR = lenR;
+
+               R += link[R].dist_to_next;
+
+               if (link[old_R].lcpnext < lenR) {
+                       /* Match length decreased.  */
+
+                       lenR = link[old_R].lcpnext;
+
+                       /* Neither the right side nor the left size has any more
+                        * matches of length @old_lenR.  If a pending match
+                        * exists, save it.  */
+                       if (pending_offset != 0) {
+                               matches[num_matches++] = (struct lz_match) {
+                                       .len = old_lenR,
+                                       .offset = pending_offset,
+                               };
+                               pending_offset = 0;
+                       }
+
+                       if (lenL >= lenR) {
+                               /* lenL >= lenR:  Extend left, unless the
+                                * minimum match length would be underrun, in
+                                * which case we are done.  */
+                               if (lenL == 0)
+                                       goto out;
+
+                               goto extend_left;
+                       }
+                       /* lenR > lenL:  Keep extending right.
+                        * (No check for whether the minimum match length has
+                        * been underrun is needed, provided that such lengths
+                        * are marked as 0.)  */
+               }
+       }
+
+out:
+       for (u32 i = 0; i < num_matches / 2; i++)
+               swap(matches[i], matches[num_matches - 1 - i]);
+       return num_matches;
+}
+
+static void
+lz_lsa_skip_positions(struct lz_mf *_mf, u32 n)
+{
+       struct lz_lsa *mf = (struct lz_lsa *)_mf;
+       do {
+               lz_lsa_update_salink(mf->ISA[mf->base.cur_window_pos++], mf->salink);
+       } while (--n);
+}
+
+static void
+lz_lsa_destroy(struct lz_mf *_mf)
+{
+       struct lz_lsa *mf = (struct lz_lsa *)_mf;
+
+       FREE(mf->SA);
+       FREE(mf->salink);
+}
+
+const struct lz_mf_ops lz_linked_suffix_array_ops = {
+       .params_valid      = lz_lsa_params_valid,
+       .get_needed_memory = lz_lsa_get_needed_memory,
+       .init              = lz_lsa_init,
+       .load_window       = lz_lsa_load_window,
+       .get_matches       = lz_lsa_get_matches,
+       .skip_positions    = lz_lsa_skip_positions,
+       .destroy           = lz_lsa_destroy,
+       .struct_size       = sizeof(struct lz_lsa),
+};
diff --git a/src/lz_mf.c b/src/lz_mf.c

new file mode 100644 (file)

index 0000000..bb95427
--- /dev/null
+++ b/src/lz_mf.c
@@ -0,0 +1,348 @@
+/*
+ * lz_mf.c
+ *
+ * Interface for Lempel-Ziv match-finders.
+ *
+ * Copyright (c) 2014 Eric Biggers.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib/lz_mf.h"
+#include "wimlib/lz_mf_ops.h"
+#include "wimlib/util.h"
+
+/* Available match-finding algorithms.  */
+static const struct lz_mf_ops *mf_ops[] = {
+       [LZ_MF_NULL]                    = &lz_null_ops,
+       [LZ_MF_BRUTE_FORCE]             = &lz_brute_force_ops,
+       [LZ_MF_HASH_CHAINS]             = &lz_hash_chains_ops,
+       [LZ_MF_BINARY_TREES]            = &lz_binary_trees_ops,
+       [LZ_MF_LCP_INTERVAL_TREE]       = &lz_lcp_interval_tree_ops,
+       [LZ_MF_LINKED_SUFFIX_ARRAY]     = &lz_linked_suffix_array_ops,
+};
+
+/*
+ * Automatically select a match-finding algorithm to use, in the case that the
+ * user did not specify one.
+ */
+static const struct lz_mf_ops *
+select_mf_ops(enum lz_mf_algo algorithm, u32 max_window_size)
+{
+       if (algorithm == LZ_MF_DEFAULT) {
+               if (max_window_size <= 32768)
+                       algorithm = LZ_MF_HASH_CHAINS;
+               else if (max_window_size <= 2097152)
+                       algorithm = LZ_MF_BINARY_TREES;
+               else if (max_window_size <= 33554432)
+                       algorithm = LZ_MF_LCP_INTERVAL_TREE;
+               else
+                       algorithm = LZ_MF_LINKED_SUFFIX_ARRAY;
+       }
+       if ((int)algorithm < 0 || (int)algorithm >= ARRAY_LEN(mf_ops))
+               return NULL;
+       return mf_ops[(int)algorithm];
+}
+
+/*
+ * Returns an upper bound on the number of bytes of memory that will be consumed
+ * by a match-finder allocated with the specified algorithm and maximum window
+ * size.
+ *
+ * The returned value does not include the size of the window itself.  The
+ * caller must account for this separately if needed.
+ *
+ * If @algorithm is invalid, returns 0.
+ */
+u64
+lz_mf_get_needed_memory(enum lz_mf_algo algorithm, u32 max_window_size)
+{
+       const struct lz_mf_ops *ops;
+
+       ops = select_mf_ops(algorithm, max_window_size);
+       if (!ops)
+               return 0;
+       return ops->struct_size + ops->get_needed_memory(max_window_size);
+}
+/*
+ * Returns %true if and only if the specified parameters can be validly used to
+ * create a match-finder using lz_mf_alloc().
+ */
+bool
+lz_mf_params_valid(const struct lz_mf_params *params)
+{
+       const struct lz_mf_ops *ops;
+
+       /* Require that a valid algorithm, or LZ_MF_DEFAULT, be specified.  */
+       ops = select_mf_ops(params->algorithm, params->max_window_size);
+       if (!ops)
+               return false;
+
+       /* Don't allow empty windows.  Otherwise, some match-finding algorithms
+        * might need special-case code to handle empty windows.  */
+       if (params->max_window_size == 0)
+               return false;
+
+       /* Don't allow length-1 matches, so that match-finding algorithms don't
+        * need to worry about this case.  Most LZ-based compression formats
+        * don't allow length-1 matches, since they usually aren't helpful for
+        * compression.  Also, if a compressor really does need length-1
+        * matches, it can easily maintain its own table of length 256
+        * containing the most-recently-seen position for each byte value.
+        *
+        * min_match_len == 0 is valid, since that means the match-finding
+        * algorithm will fill in a default value.  */
+       if (params->min_match_len == 1)
+               return false;
+
+       if (params->max_match_len != 0) {
+
+               /* Don't allow length-1 matches (same reason as above).  */
+               if (params->max_match_len == 1)
+                       return false;
+
+               /* Don't allow the maximum match length to be shorter than the
+                * minimum match length.  */
+               if (params->max_match_len < params->min_match_len)
+                       return false;
+       }
+
+       /* Don't allow the needed memory size to overflow a 'size_t'.  */
+       if (sizeof(size_t) < sizeof(u64)) {
+               u64 needed_mem = ops->get_needed_memory(params->max_window_size);
+               if ((size_t)needed_mem != needed_mem)
+                       return false;
+       }
+
+       /* Call the algorithm-specific routine to finish the validation.  */
+       return ops->params_valid(params);
+}
+
+/*
+ * Allocate a new match-finder.
+ *
+ * @params
+ *     The parameters for the match-finder.  See the declaration of 'struct
+ *     lz_mf_params' for more information.
+ *
+ * Returns a pointer to the new match-finder, or NULL if out of memory or the
+ * parameters are invalid.  Call lz_mf_params_valid() beforehand to test the
+ * parameter validity separately.
+ */
+struct lz_mf *
+lz_mf_alloc(const struct lz_mf_params *params)
+{
+       struct lz_mf *mf;
+       const struct lz_mf_ops *ops;
+
+       /* Validate the parameters.  */
+       if (!lz_mf_params_valid(params))
+               return NULL;
+
+       /* Get the match-finder operations structure.  Since we just validated
+        * the parameters, this is guaranteed to return a valid structure.  */
+       ops = select_mf_ops(params->algorithm, params->max_window_size);
+       LZ_ASSERT(ops != NULL);
+
+       /* Allocate memory for the match-finder structure.  */
+       LZ_ASSERT(ops->struct_size >= sizeof(struct lz_mf));
+       mf = CALLOC(1, ops->struct_size);
+       if (!mf)
+               return NULL;
+
+       /* Set the parameters and operations fields.  */
+       mf->params = *params;
+       mf->ops = *ops;
+
+       /* Perform algorithm-specific initialization.  Normally this is where
+        * most of the necessary memory is allocated.  */
+       if (!mf->ops.init(mf)) {
+               FREE(mf);
+               return NULL;
+       }
+
+       /* The algorithm must have set min_match_len and max_match_len if either
+        * was 0.  */
+       LZ_ASSERT(mf->params.min_match_len >= 2);
+       LZ_ASSERT(mf->params.max_match_len >= mf->params.min_match_len);
+
+       return mf;
+}
+
+/*
+ * Load a window into the match-finder.
+ *
+ * @mf
+ *     The match-finder into which to load the window.
+ * @window
+ *     Pointer to the window to load.  This memory must remain available,
+ *     unmodified, while the match-finder is being used.
+ * @size
+ *     The size of the window, in bytes.  This can't be larger than the
+ *     @max_window_size parameter.  In addition, this can't be 0.
+ *
+ * Note: this interface does not support sliding windows!
+ */
+void
+lz_mf_load_window(struct lz_mf *mf, const u8 *window, u32 size)
+{
+       /* Can't be an empty window, and can't be larger than the maximum window
+        * size with which the match-finder was allocated.  */
+       LZ_ASSERT(size > 0);
+       LZ_ASSERT(size <= mf->params.max_window_size);
+
+       /* Save the window and initialize the current position.  */
+       mf->cur_window = window;
+       mf->cur_window_size = size;
+       mf->cur_window_pos = 0;
+
+       /* Call into the algorithm-specific window load code.  */
+       mf->ops.load_window(mf, window, size);
+}
+
+/*
+ * Retrieve a list of matches at the next position in the window.
+ *
+ * @mf
+ *     The match-finder into which a window has been loaded using
+ *     lz_mf_load_window().
+ * @matches
+ *     The array into which the matches will be returned.  The returned match
+ *     count will not exceed the minimum of @max_search_depth and (@len_limit -
+ *     @min_match_len + 1), where @len_limit is itself defined as
+ *     min(@max_match_len, @nice_match_len).
+ *
+ * The return value is the number of matches that were found and stored in the
+ * 'matches' array.  The matches will be ordered by strictly increasing length
+ * and strictly increasing offset.  No match shall have length less than
+ * @min_match_len, and no match shall have length greater than @max_match_len.
+ * The return value may be 0, which indicates that no matches were found.
+ *
+ * On completion, the match-finder is advanced to the next position in the
+ * window.
+ *
+ * Note: in-non-debug mode, the inline definition of this gets used instead.
+ * They are the same, except that the non-inline version below validates the
+ * results to help debug match-finding algorithms.
+ */
+#ifdef ENABLE_LZ_DEBUG
+u32
+lz_mf_get_matches(struct lz_mf *mf, struct lz_match *matches)
+{
+       LZ_ASSERT(mf->cur_window_pos < mf->cur_window_size);
+
+       const u32 orig_pos = mf->cur_window_pos;
+       const u32 len_limit = min(mf->params.max_match_len,
+                                 lz_mf_get_bytes_remaining(mf));
+       const u8 * const strptr = lz_mf_get_window_ptr(mf);
+
+       const u32 num_matches = mf->ops.get_matches(mf, matches);
+
+       LZ_ASSERT(mf->cur_window_pos == orig_pos + 1);
+
+#if 0
+       fprintf(stderr, "Pos %"PRIu32"/%"PRIu32": %"PRIu32" matches\n",
+               orig_pos, mf->cur_window_size, num_matches);
+       for (u32 i = 0; i < num_matches; i++) {
+               fprintf(stderr, "\tLen %"PRIu32" Offset %"PRIu32"\n",
+                       matches[i].len, matches[i].offset);
+       }
+#endif
+
+       /* Validate the matches.  */
+       for (u32 i = 0; i < num_matches; i++) {
+               const u32 len = matches[i].len;
+               const u32 offset = matches[i].offset;
+               const u8 *matchptr;
+
+               /* Length valid?  */
+               LZ_ASSERT(len >= mf->params.min_match_len);
+               LZ_ASSERT(len <= len_limit);
+
+               /* Offset valid?  */
+               LZ_ASSERT(offset >= 1);
+               LZ_ASSERT(offset <= orig_pos);
+
+               /* Lengths and offsets strictly increasing?  */
+               if (i > 0) {
+                       LZ_ASSERT(len > matches[i - 1].len);
+                       LZ_ASSERT(offset > matches[i - 1].offset);
+               }
+
+               /* Actually a match?  */
+               matchptr = strptr - offset;
+               LZ_ASSERT(!memcmp(strptr, matchptr, len));
+
+               /* Match can't be extended further?  */
+               LZ_ASSERT(len == len_limit || strptr[len] != matchptr[len]);
+       }
+
+       return num_matches;
+}
+#endif /* ENABLE_LZ_DEBUG */
+
+/*
+ * Skip 'n' positions in the match-finder.  This is a faster alternative to
+ * calling lz_mf_get_matches() at each position to advance the match-finder.
+ *
+ * 'n' must be greater than 0.
+ *
+ * Note: in-non-debug mode, the inline definition of this gets used instead.
+ * They are the same, except the non-inline version below does extra checks.
+ */
+#ifdef ENABLE_LZ_DEBUG
+void
+lz_mf_skip_positions(struct lz_mf *mf, const u32 n)
+{
+       LZ_ASSERT(n > 0);
+       LZ_ASSERT(n <= lz_mf_get_bytes_remaining(mf));
+
+       const u32 orig_pos = mf->cur_window_pos;
+
+       mf->ops.skip_positions(mf, n);
+
+       LZ_ASSERT(mf->cur_window_pos == orig_pos + n);
+}
+#endif
+
+/*
+ * Free the match-finder.
+ *
+ * This frees all memory that was allocated by the call to lz_mf_alloc().
+ */
+void
+lz_mf_free(struct lz_mf *mf)
+{
+       if (mf) {
+               mf->ops.destroy(mf);
+       #ifdef ENABLE_LZ_DEBUG
+               memset(mf, 0, mf->ops.struct_size);
+       #endif
+               FREE(mf);
+       }
+}
diff --git a/src/lz_null.c b/src/lz_null.c

new file mode 100644 (file)

index 0000000..474ab13
--- /dev/null
+++ b/src/lz_null.c
@@ -0,0 +1,94 @@
+/*
+ * lz_null.c
+ *
+ * Dummy "match-finder" for Lempel-Ziv compression.
+ *
+ * Copyright (c) 2014 Eric Biggers.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib/lz_mf.h"
+
+static bool
+lz_null_params_valid(const struct lz_mf_params *_params)
+{
+       return true;
+}
+
+static u64
+lz_null_get_needed_memory(u32 max_window_size)
+{
+       return 0;
+}
+
+static bool
+lz_null_init(struct lz_mf *mf)
+{
+       if (mf->params.min_match_len == 0)
+               mf->params.min_match_len = 2;
+
+       if (mf->params.max_match_len == 0)
+               mf->params.max_match_len = mf->params.max_window_size;
+
+       return true;
+}
+
+static void
+lz_null_load_window(struct lz_mf *mf, const u8 window[], u32 size)
+{
+}
+
+static u32
+lz_null_get_matches(struct lz_mf *mf, struct lz_match matches[])
+{
+       mf->cur_window_pos++;
+       return 0;
+}
+
+static void
+lz_null_skip_positions(struct lz_mf *mf, u32 n)
+{
+       mf->cur_window_pos += n;
+}
+
+static void
+lz_null_destroy(struct lz_mf *mf)
+{
+}
+
+const struct lz_mf_ops lz_null_ops = {
+       .params_valid      = lz_null_params_valid,
+       .get_needed_memory = lz_null_get_needed_memory,
+       .init              = lz_null_init,
+       .load_window       = lz_null_load_window,
+       .get_matches       = lz_null_get_matches,
+       .skip_positions    = lz_null_skip_positions,
+       .destroy           = lz_null_destroy,
+       .struct_size       = sizeof(struct lz_mf),
+};
diff --git a/src/lz_suffix_array_utils.c b/src/lz_suffix_array_utils.c

new file mode 100644 (file)

index 0000000..f0b476c
--- /dev/null
+++ b/src/lz_suffix_array_utils.c
@@ -0,0 +1,193 @@
+/*
+ * lz_suffix_array_utils.c
+ *
+ * Common utilities for suffix-array based Lempel-Ziv match-finding algorithms.
+ *
+ * Copyright (c) 2014 Eric Biggers.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib/divsufsort.h"
+#include "wimlib/lz_mf.h"
+#include "wimlib/lz_suffix_array_utils.h"
+#include "wimlib/util.h"
+
+/* If ENABLE_LZ_DEBUG is defined, verify that the suffix array satisfies its
+ * definition.
+ *
+ * WARNING: this is for debug use only as it does not necessarily run in linear
+ * time!!!  */
+static void
+verify_SA(const u32 *SA, const u8 *T, u32 n, u32 *tmp)
+{
+#ifdef ENABLE_LZ_DEBUG
+       /* Ensure the SA contains exactly one of each i in [0, n - 1].  */
+       for (u32 i = 0; i < n; i++)
+               tmp[i] = 0;
+       for (u32 r = 0; r < n; r++) {
+               u32 i = SA[r];
+               LZ_ASSERT(i < n);
+               LZ_ASSERT(!tmp[i]);
+               tmp[i] = 1;
+       }
+
+       /* Ensure the suffix with rank r is lexicographically less than the
+        * suffix with rank (r + 1) for all r in [0, n - 2].  */
+       for (u32 r = 0; r < n - 1; r++) {
+
+               u32 i1 = SA[r];
+               u32 i2 = SA[r + 1];
+
+               u32 n1 = n - i1;
+               u32 n2 = n - i2;
+
+               int res = memcmp(&T[i1], &T[i2], min(n1, n2));
+               LZ_ASSERT(res < 0 || (res == 0 && n1 < n2));
+       }
+#endif /* ENABLE_LZ_DEBUG  */
+}
+
+/*
+ * Build the suffix array (SA) for the specified "text".
+ *
+ * The SA is a sorted array of the text's suffixes, represented by indices into
+ * the text.  It can equivalently be viewed as a mapping from suffix rank to
+ * suffix position.
+ *
+ * To build the SA, we currently rely on libdivsufsort, which uses an
+ * induced-sorting-based algorithm.  In practice, this seems to be the fastest
+ * suffix array construction algorithm currently available.
+ *
+ * References:
+ *
+ *     Y. Mori.  libdivsufsort, a lightweight suffix-sorting library.
+ *     https://code.google.com/p/libdivsufsort/.
+ *
+ *     G. Nong, S. Zhang, and W.H. Chan.  2009.  Linear Suffix Array
+ *     Construction by Almost Pure Induced-Sorting.  Data Compression
+ *     Conference, 2009.  DCC '09.  pp. 193 - 202.
+ *
+ *     S.J. Puglisi, W.F. Smyth, and A. Turpin.  2007.  A Taxonomy of Suffix
+ *     Array Construction Algorithms.  ACM Computing Surveys (CSUR) Volume 39
+ *     Issue 2, 2007 Article No. 4.
+ */
+void
+build_SA(u32 *SA, const u8 *T, u32 n, u32 *tmp)
+{
+       BUILD_BUG_ON(BUILD_SA_MIN_TMP_LEN !=
+                    DIVSUFSORT_TMP1_LEN + DIVSUFSORT_TMP2_LEN);
+
+       /* Note: divsufsort() needs temporary space --- one array with 256
+        * spaces and one array with 65536 spaces.  The implementation of
+        * divsufsort() has been modified from the original to use the provided
+        * temporary space instead of allocating its own, since we don't want to
+        * have to deal with malloc() failures here.  */
+       divsufsort(T, SA, n, tmp, tmp + DIVSUFSORT_TMP1_LEN);
+
+       verify_SA(SA, T, n, tmp);
+}
+
+
+/* Build the inverse suffix array @ISA from the suffix array @SA in linear time.
+ *
+ * Whereas the suffix array is a mapping from suffix rank to suffix position,
+ * the inverse suffix array is a mapping from suffix position to suffix rank.
+ */
+void
+build_ISA(u32 * restrict ISA, const u32 * restrict SA, u32 n)
+{
+       for (u32 r = 0; r < n; r++)
+               ISA[SA[r]] = r;
+}
+
+/* If ENABLE_LZ_DEBUG is defined, verify that the LCP (Longest Common Prefix)
+ * array satisfies its definition.
+ *
+ * WARNING: this is for debug use only as it does not necessarily run in linear
+ * time!!!  */
+static void
+verify_LCP(const u32 *LCP, const u32 *SA, const u8 *T, u32 n)
+{
+#ifdef ENABLE_LZ_DEBUG
+       for (u32 r = 0; r < n - 1; r++) {
+               u32 i1 = SA[r];
+               u32 i2 = SA[r + 1];
+               u32 lcp = LCP[r + 1];
+
+               u32 n1 = n - i1;
+               u32 n2 = n - i2;
+
+               LZ_ASSERT(lcp <= min(n1, n2));
+
+               LZ_ASSERT(memcmp(&T[i1], &T[i2], lcp) == 0);
+               if (lcp < min(n1, n2))
+                       LZ_ASSERT(T[i1 + lcp] != T[i2 + lcp]);
+       }
+#endif /* ENABLE_LZ_DEBUG */
+}
+
+/*
+ * Build the LCP (Longest Common Prefix) array in linear time.
+ *
+ * LCP[r] will be the length of the longest common prefix between the suffixes
+ * with positions SA[r - 1] and  SA[r].  LCP[0] will be undefined.
+ *
+ * Algorithm taken from Kasai et al. (2001), but modified slightly to take into
+ * account that with bytes in the real world, there is no unique symbol at the
+ * end of the string.
+ *
+ * References:
+ *
+ *     Kasai et al.  2001.  Linear-Time Longest-Common-Prefix Computation in
+ *     Suffix Arrays and Its Applications.  CPM '01 Proceedings of the 12th
+ *     Annual Symposium on Combinatorial Pattern Matching pp. 181-192.
+ */
+void
+build_LCP(u32 * restrict LCP, const u32 * restrict SA,
+         const u32 * restrict ISA, const u8 * restrict T, u32 n)
+{
+       u32 h, i, r, j, lim;
+
+       h = 0;
+       for (i = 0; i < n; i++) {
+               r = ISA[i];
+               if (r > 0) {
+                       j = SA[r - 1];
+                       lim = min(n - i, n - j);
+
+                       while (h < lim && T[i + h] == T[j + h])
+                               h++;
+                       LCP[r] = h;
+                       if (h > 0)
+                               h--;
+               }
+       }
+
+       verify_LCP(LCP, SA, T, n);
+}
diff --git a/src/lzms-compress.c b/src/lzms-compress.c

index 0567ea392fd46ea9ad23ef9739aa619764f22a2a..f417fa526d93de2ee17cd36467fdab70735960f7 100644 (file)
--- a/src/lzms-compress.c
+++ b/src/lzms-compress.c
@@ -41,8 +41,7 @@
  #include "wimlib/compress_common.h"
  #include "wimlib/endianness.h"
  #include "wimlib/error.h"
-#include "wimlib/lz.h"
-#include "wimlib/lz_bt.h"
+#include "wimlib/lz_mf.h"
  #include "wimlib/lzms.h"
  #include "wimlib/util.h"
  
@@ -158,10 +157,15 @@ struct lzms_huffman_encoder {
         u32 codewords[LZMS_MAX_NUM_SYMS];
  };
  
+struct lzms_compressor_params {
+       u32 min_match_length;
+       u32 nice_match_length;
+       u32 max_search_depth;
+       u32 optim_array_length;
+};
+
  /* State of the LZMS compressor.  */
  struct lzms_compressor {
-       struct wimlib_lzms_compressor_params params;
-
         /* Pointer to a buffer holding the preprocessed data to compress.  */
         u8 *window;
  
@@ -171,8 +175,8 @@ struct lzms_compressor {
         /* Size of the data in @buffer.  */
         u32 window_size;
  
-       /* Binary tree match-finder.  */
-       struct lz_bt mf;
+       /* Lempel-Ziv match-finder.  */
+       struct lz_mf *mf;
  
         /* Temporary space to store found matches.  */
         struct lz_match *matches;
@@ -186,6 +190,9 @@ struct lzms_compressor {
          * allocated size of @window.  */
         u32 max_block_size;
  
+       /* Compression parameters.  */
+       struct lzms_compressor_params params;
+
         /* Raw range encoder which outputs to the beginning of the compressed
          * data buffer, proceeding forwards.  */
         struct lzms_range_encoder_raw rc;
@@ -747,13 +754,13 @@ static u32
  lzms_get_matches(struct lzms_compressor *ctx, struct lz_match **matches_ret)
  {
         *matches_ret = ctx->matches;
-       return lz_bt_get_matches(&ctx->mf, ctx->matches);
+       return lz_mf_get_matches(ctx->mf, ctx->matches);
  }
  
  static void
  lzms_skip_bytes(struct lzms_compressor *ctx, u32 n)
  {
-       lz_bt_skip_positions(&ctx->mf, n);
+       lz_mf_skip_positions(ctx->mf, n);
  }
  
  static u32
@@ -866,7 +873,7 @@ lzms_match_chooser_reverse_list(struct lzms_compressor *ctx, unsigned cur_pos)
                 };
  }
  
-/* This is similar to lzx_get_near_optimal_match() in lzx-compress.c.
+/* This is similar to lzx_choose_near_optimal_match() in lzx-compress.c.
   * Read that one if you want to understand it.  */
  static struct lz_match
  lzms_get_near_optimal_match(struct lzms_compressor *ctx)
@@ -894,12 +901,11 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx)
         ctx->optimum_end_idx = 0;
  
         longest_rep_len = ctx->params.min_match_length - 1;
-       if (lz_bt_get_position(&ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) {
-               u32 limit = min(ctx->params.max_match_length,
-                               lz_bt_get_remaining_size(&ctx->mf));
+       if (lz_mf_get_position(ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) {
+               u32 limit = lz_mf_get_bytes_remaining(ctx->mf);
                 for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) {
                         u32 offset = ctx->lru.lz.recent_offsets[i];
-                       const u8 *strptr = lz_bt_get_window_ptr(&ctx->mf);
+                       const u8 *strptr = lz_mf_get_window_ptr(ctx->mf);
                         const u8 *matchptr = strptr - offset;
                         u32 len = 0;
                         while (len < limit && strptr[len] == matchptr[len])
@@ -941,7 +947,7 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx)
         ctx->optimum[1].state = initial_state;
         ctx->optimum[1].cost = lzms_get_literal_cost(ctx,
                                                      &ctx->optimum[1].state,
-                                                    *(lz_bt_get_window_ptr(&ctx->mf) - 1));
+                                                    *(lz_mf_get_window_ptr(ctx->mf) - 1));
         ctx->optimum[1].prev.link = 0;
  
         for (u32 i = 0, len = 2; i < num_matches; i++) {
@@ -998,12 +1004,11 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx)
                         return lzms_match_chooser_reverse_list(ctx, cur_pos);
  
                 longest_rep_len = ctx->params.min_match_length - 1;
-               if (lz_bt_get_position(&ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) {
-                       u32 limit = min(ctx->params.max_match_length,
-                                       lz_bt_get_remaining_size(&ctx->mf));
+               if (lz_mf_get_position(ctx->mf) >= LZMS_MAX_INIT_RECENT_OFFSET) {
+                       u32 limit = lz_mf_get_bytes_remaining(ctx->mf);
                         for (int i = 0; i < LZMS_NUM_RECENT_OFFSETS; i++) {
                                 u32 offset = ctx->optimum[cur_pos].state.lru.recent_offsets[i];
-                               const u8 *strptr = lz_bt_get_window_ptr(&ctx->mf);
+                               const u8 *strptr = lz_mf_get_window_ptr(ctx->mf);
                                 const u8 *matchptr = strptr - offset;
                                 u32 len = 0;
                                 while (len < limit && strptr[len] == matchptr[len])
@@ -1054,7 +1059,7 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx)
                 cost = ctx->optimum[cur_pos].cost +
                         lzms_get_literal_cost(ctx,
                                               &state,
-                                             *(lz_bt_get_window_ptr(&ctx->mf) - 1));
+                                             *(lz_mf_get_window_ptr(ctx->mf) - 1));
                 if (cost < ctx->optimum[cur_pos + 1].cost) {
                         ctx->optimum[cur_pos + 1].state = state;
                         ctx->optimum[cur_pos + 1].cost = cost;
@@ -1116,25 +1121,22 @@ lzms_get_near_optimal_match(struct lzms_compressor *ctx)
   *
   * Notes:
   *
- * - This uses near-optimal LZ parsing backed by a binary tree match-finder.
- *
   * - This does not output any delta matches.
   *
   * - The costs of literals and matches are estimated using the range encoder
   *   states and the semi-adaptive Huffman codes.  Except for range encoding
   *   states, costs are assumed to be constant throughout a single run of the
- *   parsing algorithm, which can parse up to @optim_array_length (from the
- *   `struct wimlib_lzms_compressor_params') bytes of data.  This introduces a
- *   source of inaccuracy because the probabilities and Huffman codes can change
- *   over this part of the data.
+ *   parsing algorithm, which can parse up to @optim_array_length bytes of data.
+ *   This introduces a source of inaccuracy because the probabilities and
+ *   Huffman codes can change over this part of the data.
   */
  static void
  lzms_encode(struct lzms_compressor *ctx)
  {
         struct lz_match match;
  
-       /* Load window into the binary tree match-finder.  */
-       lz_bt_load_window(&ctx->mf, ctx->window, ctx->window_size);
+       /* Load window into the match-finder.  */
+       lz_mf_load_window(ctx->mf, ctx->window, ctx->window_size);
  
         /* Reset the match-chooser.  */
         ctx->optimum_cur_idx = 0;
@@ -1302,6 +1304,107 @@ lzms_finalize(struct lzms_compressor *ctx, u8 *cdata, size_t csize_avail)
         return compressed_size;
  }
  
+
+static void
+lzms_build_params(unsigned int compression_level,
+                 struct lzms_compressor_params *lzms_params)
+{
+       lzms_params->min_match_length  = (compression_level >= 50) ? 2 : 3;
+       lzms_params->nice_match_length = ((u64)compression_level * 32) / 50;
+       lzms_params->max_search_depth  = ((u64)compression_level * 50) / 50;
+       lzms_params->optim_array_length = 224 + compression_level * 16;
+}
+
+static void
+lzms_build_mf_params(const struct lzms_compressor_params *lzms_params,
+                    u32 max_window_size, struct lz_mf_params *mf_params)
+{
+       memset(mf_params, 0, sizeof(*mf_params));
+
+       mf_params->algorithm = LZ_MF_DEFAULT;
+       mf_params->max_window_size = max_window_size;
+       mf_params->min_match_len = lzms_params->min_match_length;
+       mf_params->max_search_depth = lzms_params->max_search_depth;
+       mf_params->nice_match_len = lzms_params->nice_match_length;
+}
+
+static void
+lzms_free_compressor(void *_ctx);
+
+static u64
+lzms_get_needed_memory(size_t max_block_size, unsigned int compression_level)
+{
+       struct lzms_compressor_params params;
+
+       lzms_build_params(compression_level, &params);
+
+       u64 size = 0;
+
+       size += sizeof(struct lzms_compressor);
+       size += max_block_size;
+       size += lz_mf_get_needed_memory(LZ_MF_DEFAULT, max_block_size);
+       size += params.max_search_depth * sizeof(struct lz_match);
+       size += (params.optim_array_length + params.nice_match_length) *
+               sizeof(struct lzms_mc_pos_data);
+
+       return size;
+}
+
+static int
+lzms_create_compressor(size_t max_block_size, unsigned int compression_level,
+                      void **ctx_ret)
+{
+       struct lzms_compressor *ctx;
+       struct lzms_compressor_params params;
+       struct lz_mf_params mf_params;
+
+       if (max_block_size >= INT32_MAX)
+               return WIMLIB_ERR_INVALID_PARAM;
+
+       lzms_build_params(compression_level, &params);
+       lzms_build_mf_params(&params, max_block_size, &mf_params);
+       if (!lz_mf_params_valid(&mf_params))
+               return WIMLIB_ERR_INVALID_PARAM;
+
+       ctx = CALLOC(1, sizeof(struct lzms_compressor));
+       if (!ctx)
+               goto oom;
+
+       ctx->params = params;
+       ctx->max_block_size = max_block_size;
+
+       ctx->window = MALLOC(max_block_size);
+       if (!ctx->window)
+               goto oom;
+
+       ctx->mf = lz_mf_alloc(&mf_params);
+       if (!ctx->mf)
+               goto oom;
+
+       ctx->matches = MALLOC(params.max_search_depth * sizeof(struct lz_match));
+       if (!ctx->matches)
+               goto oom;
+
+       ctx->optimum = MALLOC((params.optim_array_length +
+                              params.nice_match_length) *
+                               sizeof(struct lzms_mc_pos_data));
+       if (!ctx->optimum)
+               goto oom;
+
+       /* Initialize position and length slot data if not done already.  */
+       lzms_init_slots();
+
+       /* Initialize range encoding cost table if not done already.  */
+       lzms_init_rc_costs();
+
+       *ctx_ret = ctx;
+       return 0;
+
+oom:
+       lzms_free_compressor(ctx);
+       return WIMLIB_ERR_NOMEM;
+}
+
  static size_t
  lzms_compress(const void *uncompressed_data, size_t uncompressed_size,
               void *compressed_data, size_t compressed_size_avail, void *_ctx)
@@ -1312,15 +1415,6 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size,
         LZMS_DEBUG("uncompressed_size=%zu, compressed_size_avail=%zu",
                    uncompressed_size, compressed_size_avail);
  
-       /* Make sure the uncompressed size is compatible with this compressor.
-        */
-       if (uncompressed_size > ctx->max_block_size) {
-               LZMS_DEBUG("Can't compress %zu bytes: LZMS context "
-                          "only supports %u bytes",
-                          uncompressed_size, ctx->max_block_size);
-               return 0;
-       }
-
         /* Don't bother compressing extremely small inputs.  */
         if (uncompressed_size < 4) {
                 LZMS_DEBUG("Input too small to bother compressing.");
@@ -1358,47 +1452,6 @@ lzms_compress(const void *uncompressed_data, size_t uncompressed_size,
         LZMS_DEBUG("Compressed %zu => %zu bytes",
                    uncompressed_size, compressed_size);
  
-#if defined(ENABLE_VERIFY_COMPRESSION) || defined(ENABLE_LZMS_DEBUG)
-       /* Verify that we really get the same thing back when decompressing.  */
-       {
-               struct wimlib_decompressor *decompressor;
-
-               LZMS_DEBUG("Verifying LZMS compression.");
-
-               if (0 == wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_LZMS,
-                                                   ctx->max_block_size,
-                                                   NULL,
-                                                   &decompressor))
-               {
-                       int ret;
-                       ret = wimlib_decompress(compressed_data,
-                                               compressed_size,
-                                               ctx->window,
-                                               uncompressed_size,
-                                               decompressor);
-                       wimlib_free_decompressor(decompressor);
-
-                       if (ret) {
-                               ERROR("Failed to decompress data we "
-                                     "compressed using LZMS algorithm");
-                               wimlib_assert(0);
-                               return 0;
-                       }
-                       if (memcmp(uncompressed_data, ctx->window,
-                                  uncompressed_size))
-                       {
-                               ERROR("Data we compressed using LZMS algorithm "
-                                     "didn't decompress to original");
-                               wimlib_assert(0);
-                               return 0;
-                       }
-               } else {
-                       WARNING("Failed to create decompressor for "
-                               "data verification!");
-               }
-       }
-#endif /* ENABLE_LZMS_DEBUG || ENABLE_VERIFY_COMPRESSION  */
-
         return compressed_size;
  }
  
@@ -1409,140 +1462,14 @@ lzms_free_compressor(void *_ctx)
  
         if (ctx) {
                 FREE(ctx->window);
+               lz_mf_free(ctx->mf);
                 FREE(ctx->matches);
-               lz_bt_destroy(&ctx->mf);
                 FREE(ctx->optimum);
                 FREE(ctx);
         }
  }
  
-static const struct wimlib_lzms_compressor_params lzms_default = {
-       .hdr = {
-               .size = sizeof(struct wimlib_lzms_compressor_params),
-       },
-       .min_match_length = 2,
-       .max_match_length = UINT32_MAX,
-       .nice_match_length = 32,
-       .max_search_depth = 50,
-       .optim_array_length = 1024,
-};
-
-static bool
-lzms_params_valid(const struct wimlib_compressor_params_header *);
-
-static const struct wimlib_lzms_compressor_params *
-lzms_get_params(const struct wimlib_compressor_params_header *_params)
-{
-       const struct wimlib_lzms_compressor_params *params =
-               (const struct wimlib_lzms_compressor_params*)_params;
-
-       if (params == NULL)
-               params = &lzms_default;
-
-       LZMS_ASSERT(lzms_params_valid(&params->hdr));
-
-       return params;
-}
-
-static int
-lzms_create_compressor(size_t max_block_size,
-                      const struct wimlib_compressor_params_header *_params,
-                      void **ctx_ret)
-{
-       struct lzms_compressor *ctx;
-       const struct wimlib_lzms_compressor_params *params = lzms_get_params(_params);
-
-       if (max_block_size == 0 || max_block_size >= INT32_MAX) {
-               LZMS_DEBUG("Invalid max_block_size (%u)", max_block_size);
-               return WIMLIB_ERR_INVALID_PARAM;
-       }
-
-       ctx = CALLOC(1, sizeof(struct lzms_compressor));
-       if (ctx == NULL)
-               goto oom;
-
-       ctx->window = MALLOC(max_block_size);
-       if (ctx->window == NULL)
-               goto oom;
-
-       ctx->matches = MALLOC(min(params->max_match_length -
-                                       params->min_match_length + 1,
-                                 params->max_search_depth + 2) *
-                               sizeof(ctx->matches[0]));
-       if (ctx->matches == NULL)
-               goto oom;
-
-       if (!lz_bt_init(&ctx->mf,
-                       max_block_size,
-                       params->min_match_length,
-                       params->max_match_length,
-                       params->nice_match_length,
-                       params->max_search_depth))
-               goto oom;
-
-       ctx->optimum = MALLOC((params->optim_array_length +
-                              min(params->nice_match_length,
-                                  params->max_match_length)) *
-                                       sizeof(ctx->optimum[0]));
-       if (!ctx->optimum)
-               goto oom;
-
-       /* Initialize position and length slot data if not done already.  */
-       lzms_init_slots();
-
-       /* Initialize range encoding cost table if not done already.  */
-       lzms_init_rc_costs();
-
-       ctx->max_block_size = max_block_size;
-       memcpy(&ctx->params, params, sizeof(*params));
-
-       *ctx_ret = ctx;
-       return 0;
-
-oom:
-       lzms_free_compressor(ctx);
-       return WIMLIB_ERR_NOMEM;
-}
-
-static u64
-lzms_get_needed_memory(size_t max_block_size,
-                      const struct wimlib_compressor_params_header *_params)
-{
-       const struct wimlib_lzms_compressor_params *params = lzms_get_params(_params);
-
-       u64 size = 0;
-
-       size += max_block_size;
-       size += sizeof(struct lzms_compressor);
-       size += lz_bt_get_needed_memory(max_block_size);
-       size += (params->optim_array_length +
-                min(params->nice_match_length,
-                    params->max_match_length)) *
-                        sizeof(((struct lzms_compressor *)0)->optimum[0]);
-       size += min(params->max_match_length - params->min_match_length + 1,
-                   params->max_search_depth + 2) *
-               sizeof(((struct lzms_compressor*)0)->matches[0]);
-       return size;
-}
-
-static bool
-lzms_params_valid(const struct wimlib_compressor_params_header *_params)
-{
-       const struct wimlib_lzms_compressor_params *params =
-               (const struct wimlib_lzms_compressor_params*)_params;
-
-       if (params->hdr.size != sizeof(*params) ||
-           params->max_match_length < params->min_match_length ||
-           params->min_match_length < 2 ||
-           params->optim_array_length == 0 ||
-           min(params->max_match_length, params->nice_match_length) > 65536)
-               return false;
-
-       return true;
-}
-
  const struct compressor_ops lzms_compressor_ops = {
-       .params_valid       = lzms_params_valid,
         .get_needed_memory  = lzms_get_needed_memory,
         .create_compressor  = lzms_create_compressor,
         .compress           = lzms_compress,
diff --git a/src/lzms-decompress.c b/src/lzms-decompress.c

index e68a97e26c85d70bfec76943e57719716a30d369..f8bfcdb17344b3055d343e7be7615897484d7bfc 100644 (file)
--- a/src/lzms-decompress.c
+++ b/src/lzms-decompress.c
@@ -1044,9 +1044,7 @@ lzms_free_decompressor(void *_ctx)
  }
  
  static int
-lzms_create_decompressor(size_t max_block_size,
-                        const struct wimlib_decompressor_params_header *params,
-                        void **ctx_ret)
+lzms_create_decompressor(size_t max_block_size, void **ctx_ret)
  {
         struct lzms_decompressor *ctx;
  
diff --git a/src/lzx-compress.c b/src/lzx-compress.c

index a51cfb1b15f7d82c3d5bac490beb204ccb3cb13f..7510457488b5025be14faf52ce6c8210e1410e49 100644 (file)
--- a/src/lzx-compress.c
+++ b/src/lzx-compress.c
@@ -1,5 +1,7 @@
  /*
   * lzx-compress.c
+ *
+ * A compressor that produces output compatible with the LZX compression format.
   */
  
  /*
@@ -109,44 +111,14 @@
   * on offsets is that LZX does not allow the last 2 bytes of the window to match
   * the the beginning of the window.
   *
- * Depending on how good a compression ratio we want (see the "Match-choosing"
- * section), we may want to find: (a) all matches, or (b) just the longest
- * match, or (c) just some "promising" matches that we are able to find quickly,
- * or (d) just the longest match that we're able to find quickly.  Below we
- * introduce the match-finding methods that the code currently uses or has
- * previously used:
- *
- * - Hash chains.  Maintain a table that maps hash codes, computed from
- *   fixed-length byte sequences, to linked lists containing previous window
- *   positions.  To search for matches, compute the hash for the current
- *   position in the window and search the appropriate hash chain.  When
- *   advancing to the next position, prepend the current position to the
- *   appropriate hash list.  This is a good approach for producing matches with
- *   stategy (d) and is useful for fast compression.  Therefore, we provide an
- *   option to use this method for LZX compression.  See lz_hash.c for the
- *   implementation.
- *
- * - Binary trees.  Similar to hash chains, but each hash bucket contains a
- *   binary tree of previous window positions rather than a linked list.  This
- *   is a good approach for producing matches with stategy (c) and is useful for
- *   achieving a good compression ratio.  Therefore, we provide an option to use
- *   this method; see lz_bt.c for the implementation.
- *
- * - Suffix arrays.  This code previously used this method to produce matches
- *   with stategy (c), but I've dropped it because it was slower than the binary
- *   trees approach, used more memory, and did not improve the compression ratio
- *   enough to compensate.  Download wimlib v1.6.2 if you want the code.
- *   However, the suffix array method was basically as follows.  Build the
- *   suffix array for the entire window.  The suffix array contains each
- *   possible window position, sorted by the lexicographic order of the strings
- *   that begin at those positions.  Find the matches at a given position by
- *   searching the suffix array outwards, in both directions, from the suffix
- *   array slot for that position.  This produces the longest matches first, but
- *   "matches" that actually occur at later positions in the window must be
- *   skipped.  To do this skipping, use an auxiliary array with dynamically
- *   constructed linked lists.  Also, use the inverse suffix array to quickly
- *   find the suffix array slot for a given position without doing a binary
- *   search.
+ * There are a number of algorithms that can be used for this, including hash
+ * chains, binary trees, and suffix arrays.  Binary trees generally work well
+ * for LZX compression since it uses medium-size windows (2^15 to 2^21 bytes).
+ * However, when compressing in a fast mode where many positions are skipped
+ * (not searched for matches), hash chains are faster.
+ *
+ * Since the match-finders are not specific to LZX, I will not explain them in
+ * detail here.  Instead, see lz_hash_chains.c and lz_binary_trees.c.
   *
   * ----------------------------------------------------------------------------
   *
@@ -216,46 +188,29 @@
   * for example.  Therefore, for fast compression we combine lazy parsing with
   * the hash chain max-finder.  For normal/high compression we combine
   * near-optimal parsing with the binary tree match-finder.
- *
- * Anyway, if you've read through this comment, you hopefully should have a
- * better idea of why things are done in a certain way in this LZX compressor,
- * as well as in other compressors for LZ77-based formats (including third-party
- * ones).  In my opinion, the phrase "compression algorithm" is often mis-used
- * in place of "compression format",  since there can be many different
- * algorithms that all generate compressed data in the same format.  The
- * challenge is to design an algorithm that is efficient but still gives a good
- * compression ratio.
   */
  
  #ifdef HAVE_CONFIG_H
  #  include "config.h"
  #endif
  
-#include "wimlib.h"
  #include "wimlib/compressor_ops.h"
  #include "wimlib/compress_common.h"
-#include "wimlib/endianness.h"
  #include "wimlib/error.h"
-#include "wimlib/lz.h"
-#include "wimlib/lz_hash.h"
-#include "wimlib/lz_bt.h"
+#include "wimlib/lz_mf.h"
  #include "wimlib/lzx.h"
  #include "wimlib/util.h"
  #include <string.h>
  
-#ifdef ENABLE_LZX_DEBUG
-#  include "wimlib/decompress_common.h"
-#endif
-
-#define LZX_OPTIM_ARRAY_SIZE   4096
+#define LZX_OPTIM_ARRAY_LENGTH 4096
  
  #define LZX_DIV_BLOCK_SIZE     32768
  
  #define LZX_CACHE_PER_POS      8
  
+#define LZX_MAX_MATCHES_PER_POS        (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
+
  #define LZX_CACHE_LEN (LZX_DIV_BLOCK_SIZE * (LZX_CACHE_PER_POS + 1))
-#define LZX_CACHE_SIZE (LZX_CACHE_LEN * sizeof(struct lz_match))
-#define LZX_MAX_MATCHES_PER_POS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
  
  /* Codewords for the LZX main, length, and aligned offset Huffman codes  */
  struct lzx_codewords {
@@ -340,12 +295,20 @@ struct lzx_block_spec {
         struct lzx_codes codes;
  };
  
+struct lzx_compressor;
+
+struct lzx_compressor_params {
+       struct lz_match (*choose_item_func)(struct lzx_compressor *);
+       enum lz_mf_algo mf_algo;
+       u32 num_optim_passes;
+       u32 min_match_length;
+       u32 nice_match_length;
+       u32 max_search_depth;
+};
+
  /* State of the LZX compressor.  */
  struct lzx_compressor {
  
-       /* The parameters that were used to create the compressor.  */
-       struct wimlib_lzx_compressor_params params;
-
         /* The buffer of data to be compressed.
          *
          * 0xe8 byte preprocessing is done directly on the data here before
@@ -354,20 +317,22 @@ struct lzx_compressor {
          * Note that this compressor does *not* use a real sliding window!!!!
          * It's not needed in the WIM format, since every chunk is compressed
          * independently.  This is by design, to allow random access to the
-        * chunks.
-        *
-        * We reserve a few extra bytes to potentially allow reading off the end
-        * of the array in the match-finding code for optimization purposes
-        * (currently only needed for the hash chain match-finder).  */
-       u8 *window;
+        * chunks.  */
+       u8 *cur_window;
  
         /* Number of bytes of data to be compressed, which is the number of
-        * bytes of data in @window that are actually valid.  */
-       u32 window_size;
+        * bytes of data in @cur_window that are actually valid.  */
+       u32 cur_window_size;
  
-       /* Allocated size of the @window.  */
+       /* Allocated size of @cur_window.  */
         u32 max_window_size;
  
+       /* Compression parameters.  */
+       struct lzx_compressor_params params;
+
+       unsigned (*get_matches_func)(struct lzx_compressor *, const struct lz_match **);
+       void (*skip_bytes_func)(struct lzx_compressor *, unsigned n);
+
         /* Number of symbols in the main alphabet (depends on the
          * @max_window_size since it determines the maximum allowed offset).  */
         unsigned num_main_syms;
@@ -396,11 +361,8 @@ struct lzx_compressor {
         /* The current cost model.  */
         struct lzx_costs costs;
  
-       /* Fast algorithm only:  Array of hash table links.  */
-       u32 *prev_tab;
-
-       /* Slow algorithm only: Binary tree match-finder.  */
-       struct lz_bt mf;
+       /* Lempel-Ziv match-finder.  */
+       struct lz_mf *mf;
  
         /* Position in window of next match to return.  */
         u32 match_window_pos;
@@ -409,17 +371,18 @@ struct lzx_compressor {
          * position.  */
         u32 match_window_end;
  
-       /* Matches found by the match-finder are cached in the following array
-        * to achieve a slight speedup when the same matches are needed on
+       /* When doing more than one match-choosing pass over the data, matches
+        * found by the match-finder are cached in the following array to
+        * achieve a slight speedup when the same matches are needed on
          * subsequent passes.  This is suboptimal because different matches may
          * be preferred with different cost models, but seems to be a worthwhile
          * speedup.  */
         struct lz_match *cached_matches;
         struct lz_match *cache_ptr;
-       bool matches_cached;
         struct lz_match *cache_limit;
  
-       /* Match-chooser state.
+       /* Match-chooser state, used when doing near-optimal parsing.
+        *
          * When matches have been chosen, optimum_cur_idx is set to the position
          * in the window of the next match/literal to return and optimum_end_idx
          * is set to the position in the window at the end of the last
@@ -427,6 +390,9 @@ struct lzx_compressor {
         struct lzx_mc_pos_data *optimum;
         unsigned optimum_cur_idx;
         unsigned optimum_end_idx;
+
+       /* Previous match, used when doing lazy parsing.  */
+       struct lz_match prev_match;
  };
  
  /*
@@ -896,43 +862,6 @@ lzx_write_items(struct output_bitstream *ostream, int block_type,
         }
  }
  
-static void
-lzx_assert_codes_valid(const struct lzx_codes * codes, unsigned num_main_syms)
-{
-#ifdef ENABLE_LZX_DEBUG
-       unsigned i;
-
-       for (i = 0; i < num_main_syms; i++)
-               LZX_ASSERT(codes->lens.main[i] <= LZX_MAX_MAIN_CODEWORD_LEN);
-
-       for (i = 0; i < LZX_LENCODE_NUM_SYMBOLS; i++)
-               LZX_ASSERT(codes->lens.len[i] <= LZX_MAX_LEN_CODEWORD_LEN);
-
-       for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++)
-               LZX_ASSERT(codes->lens.aligned[i] <= LZX_MAX_ALIGNED_CODEWORD_LEN);
-
-       const unsigned tablebits = 10;
-       u16 decode_table[(1 << tablebits) +
-                        (2 * max(num_main_syms, LZX_LENCODE_NUM_SYMBOLS))]
-                        _aligned_attribute(DECODE_TABLE_ALIGNMENT);
-       LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
-                                                 num_main_syms,
-                                                 min(tablebits, LZX_MAINCODE_TABLEBITS),
-                                                 codes->lens.main,
-                                                 LZX_MAX_MAIN_CODEWORD_LEN));
-       LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
-                                                 LZX_LENCODE_NUM_SYMBOLS,
-                                                 min(tablebits, LZX_LENCODE_TABLEBITS),
-                                                 codes->lens.len,
-                                                 LZX_MAX_LEN_CODEWORD_LEN));
-       LZX_ASSERT(0 == make_huffman_decode_table(decode_table,
-                                                 LZX_ALIGNEDCODE_NUM_SYMBOLS,
-                                                 min(tablebits, LZX_ALIGNEDCODE_TABLEBITS),
-                                                 codes->lens.aligned,
-                                                 LZX_MAX_ALIGNED_CODEWORD_LEN));
-#endif /* ENABLE_LZX_DEBUG */
-}
-
  /* Write an LZX aligned offset or verbatim block to the output.  */
  static void
  lzx_write_compressed_block(int block_type,
@@ -949,7 +878,6 @@ lzx_write_compressed_block(int block_type,
  
         LZX_ASSERT(block_type == LZX_BLOCKTYPE_ALIGNED ||
                    block_type == LZX_BLOCKTYPE_VERBATIM);
-       lzx_assert_codes_valid(codes, num_main_syms);
  
         /* The first three bits indicate the type of block and are one of the
          * LZX_BLOCKTYPE_* constants.  */
@@ -990,8 +918,6 @@ lzx_write_compressed_block(int block_type,
                         bitstream_put_bits(ostream, codes->lens.aligned[i],
                                            LZX_ALIGNEDCODE_ELEMENT_SIZE);
  
-       LZX_DEBUG("Writing main code...");
-
         /* Write the precode and lengths for the first LZX_NUM_CHARS symbols in
          * the main code, which are the codewords for literal bytes.  */
         lzx_write_compressed_code(ostream,
@@ -1006,41 +932,35 @@ lzx_write_compressed_block(int block_type,
                                   prev_codes->lens.main + LZX_NUM_CHARS,
                                   num_main_syms - LZX_NUM_CHARS);
  
-       LZX_DEBUG("Writing length code...");
-
         /* Write the precode and lengths for the length code.  */
         lzx_write_compressed_code(ostream,
                                   codes->lens.len,
                                   prev_codes->lens.len,
                                   LZX_LENCODE_NUM_SYMBOLS);
  
-       LZX_DEBUG("Writing matches and literals...");
-
         /* Write the actual matches and literals.  */
         lzx_write_items(ostream, block_type,
                         chosen_items, num_chosen_items, codes);
-
-       LZX_DEBUG("Done writing block.");
  }
  
  /* Write out the LZX blocks that were computed.  */
  static void
-lzx_write_all_blocks(struct lzx_compressor *ctx, struct output_bitstream *ostream)
+lzx_write_all_blocks(struct lzx_compressor *c, struct output_bitstream *ostream)
  {
  
-       const struct lzx_codes *prev_codes = &ctx->zero_codes;
-       for (unsigned i = 0; i < ctx->num_blocks; i++) {
-               const struct lzx_block_spec *spec = &ctx->block_specs[i];
+       const struct lzx_codes *prev_codes = &c->zero_codes;
+       for (unsigned i = 0; i < c->num_blocks; i++) {
+               const struct lzx_block_spec *spec = &c->block_specs[i];
  
                 LZX_DEBUG("Writing block %u/%u (type=%d, size=%u, num_chosen_items=%u)...",
-                         i + 1, ctx->num_blocks,
+                         i + 1, c->num_blocks,
                           spec->block_type, spec->block_size,
                           spec->num_chosen_items);
  
                 lzx_write_compressed_block(spec->block_type,
                                            spec->block_size,
-                                          ctx->max_window_size,
-                                          ctx->num_main_syms,
+                                          c->max_window_size,
+                                          c->num_main_syms,
                                            spec->chosen_items,
                                            spec->num_chosen_items,
                                            &spec->codes,
@@ -1131,28 +1051,6 @@ lzx_tally_match(unsigned match_len, u32 match_offset,
                 (adjusted_match_len);
  }
  
-struct lzx_record_ctx {
-       struct lzx_freqs freqs;
-       struct lzx_lru_queue queue;
-       struct lzx_item *items;
-};
-
-static void
-lzx_record_match(unsigned len, unsigned offset, void *_ctx)
-{
-       struct lzx_record_ctx *ctx = _ctx;
-
-       (ctx->items++)->data = lzx_tally_match(len, offset, &ctx->freqs, &ctx->queue);
-}
-
-static void
-lzx_record_literal(u8 lit, void *_ctx)
-{
-       struct lzx_record_ctx *ctx = _ctx;
-
-       (ctx->items++)->data = lzx_tally_literal(lit, &ctx->freqs);
-}
-
  /* Returns the cost, in bits, to output a literal byte using the specified cost
   * model.  */
  static u32
@@ -1200,7 +1098,8 @@ lzx_match_cost(unsigned length, u32 offset, const struct lzx_costs *costs,
  
  }
  
-/* Set the cost model @ctx->costs from the Huffman codeword lengths specified in
+
+/* Set the cost model @c->costs from the Huffman codeword lengths specified in
   * @lens.
   *
   * The cost model and codeword lengths are almost the same thing, but the
@@ -1210,67 +1109,35 @@ lzx_match_cost(unsigned length, u32 offset, const struct lzx_costs *costs,
   * length) to take into account the fact that uses of these symbols are expected
   * to be rare.  */
  static void
-lzx_set_costs(struct lzx_compressor * ctx, const struct lzx_lens * lens)
+lzx_set_costs(struct lzx_compressor *c, const struct lzx_lens * lens,
+             unsigned nostat)
  {
         unsigned i;
-       unsigned num_main_syms = ctx->num_main_syms;
  
         /* Main code  */
-       for (i = 0; i < num_main_syms; i++) {
-               ctx->costs.main[i] = lens->main[i];
-               if (ctx->costs.main[i] == 0)
-                       ctx->costs.main[i] = ctx->params.alg_params.slow.main_nostat_cost;
-       }
+       for (i = 0; i < c->num_main_syms; i++)
+               c->costs.main[i] = lens->main[i] ? lens->main[i] : nostat;
  
         /* Length code  */
-       for (i = 0; i < LZX_LENCODE_NUM_SYMBOLS; i++) {
-               ctx->costs.len[i] = lens->len[i];
-               if (ctx->costs.len[i] == 0)
-                       ctx->costs.len[i] = ctx->params.alg_params.slow.len_nostat_cost;
-       }
+       for (i = 0; i < LZX_LENCODE_NUM_SYMBOLS; i++)
+               c->costs.len[i] = lens->len[i] ? lens->len[i] : nostat;
  
         /* Aligned offset code  */
-       for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
-               ctx->costs.aligned[i] = lens->aligned[i];
-               if (ctx->costs.aligned[i] == 0)
-                       ctx->costs.aligned[i] = ctx->params.alg_params.slow.aligned_nostat_cost;
-       }
+       for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++)
+               c->costs.aligned[i] = lens->aligned[i] ? lens->aligned[i] : nostat / 2;
  }
  
-/* Retrieve a list of matches available at the next position in the input.
- *
- * A pointer to the matches array is written into @matches_ret, and the return
- * value is the number of matches found.  */
-static unsigned
-lzx_get_matches(struct lzx_compressor *ctx,
-               const struct lz_match **matches_ret)
+/* Don't allow matches to span the end of an LZX block.  */
+static inline u32
+maybe_truncate_matches(struct lz_match matches[], u32 num_matches,
+                      struct lzx_compressor *c)
  {
-       struct lz_match *cache_ptr;
-       struct lz_match *matches;
-       unsigned num_matches;
-
-       LZX_ASSERT(ctx->match_window_pos < ctx->match_window_end);
-
-       cache_ptr = ctx->cache_ptr;
-       matches = cache_ptr + 1;
-       if (likely(cache_ptr <= ctx->cache_limit)) {
-               if (ctx->matches_cached) {
-                       num_matches = cache_ptr->len;
-               } else {
-                       num_matches = lz_bt_get_matches(&ctx->mf, matches);
-                       cache_ptr->len = num_matches;
-               }
-       } else {
-               num_matches = 0;
-       }
-
-       /* Don't allow matches to span the end of an LZX block.  */
-       if (ctx->match_window_end < ctx->window_size && num_matches != 0) {
-               unsigned limit = ctx->match_window_end - ctx->match_window_pos;
+       if (c->match_window_end < c->cur_window_size && num_matches != 0) {
+               u32 limit = c->match_window_end - c->match_window_pos;
  
                 if (limit >= LZX_MIN_MATCH_LEN) {
  
-                       unsigned i = num_matches - 1;
+                       u32 i = num_matches - 1;
                         do {
                                 if (matches[i].len >= limit) {
                                         matches[i].len = limit;
@@ -1284,58 +1151,196 @@ lzx_get_matches(struct lzx_compressor *ctx,
                 } else {
                         num_matches = 0;
                 }
-               cache_ptr->len = num_matches;
         }
+       return num_matches;
+}
  
-#if 0
-       fprintf(stderr, "Pos %u/%u: %u matches\n",
-               ctx->match_window_pos, ctx->window_size, num_matches);
-       for (unsigned i = 0; i < num_matches; i++)
-               fprintf(stderr, "\tLen %u Offset %u\n", matches[i].len, matches[i].offset);
-#endif
+static unsigned
+lzx_get_matches_fillcache_singleblock(struct lzx_compressor *c,
+                                     const struct lz_match **matches_ret)
+{
+       struct lz_match *cache_ptr;
+       struct lz_match *matches;
+       unsigned num_matches;
  
-#ifdef ENABLE_LZX_DEBUG
-       for (unsigned i = 0; i < num_matches; i++) {
-               LZX_ASSERT(matches[i].len >= LZX_MIN_MATCH_LEN);
-               LZX_ASSERT(matches[i].len <= LZX_MAX_MATCH_LEN);
-               LZX_ASSERT(matches[i].len <= ctx->match_window_end - ctx->match_window_pos);
-               LZX_ASSERT(matches[i].offset > 0);
-               LZX_ASSERT(matches[i].offset <= ctx->match_window_pos);
-               LZX_ASSERT(!memcmp(&ctx->window[ctx->match_window_pos],
-                                  &ctx->window[ctx->match_window_pos - matches[i].offset],
-                                  matches[i].len));
-               if (i) {
-                       LZX_ASSERT(matches[i].len > matches[i - 1].len);
-                       LZX_ASSERT(matches[i].offset > matches[i - 1].offset);
-               }
+       cache_ptr = c->cache_ptr;
+       matches = cache_ptr + 1;
+       if (likely(cache_ptr <= c->cache_limit)) {
+               num_matches = lz_mf_get_matches(c->mf, matches);
+               cache_ptr->len = num_matches;
+               c->cache_ptr = matches + num_matches;
+       } else {
+               num_matches = 0;
         }
-#endif
-       ctx->match_window_pos++;
-       ctx->cache_ptr = matches + num_matches;
+       c->match_window_pos++;
         *matches_ret = matches;
         return num_matches;
  }
  
-static void
-lzx_skip_bytes(struct lzx_compressor *ctx, unsigned n)
+static unsigned
+lzx_get_matches_fillcache_multiblock(struct lzx_compressor *c,
+                                    const struct lz_match **matches_ret)
  {
         struct lz_match *cache_ptr;
+       struct lz_match *matches;
+       unsigned num_matches;
+
+       cache_ptr = c->cache_ptr;
+       matches = cache_ptr + 1;
+       if (likely(cache_ptr <= c->cache_limit)) {
+               num_matches = lz_mf_get_matches(c->mf, matches);
+               num_matches = maybe_truncate_matches(matches, num_matches, c);
+               cache_ptr->len = num_matches;
+               c->cache_ptr = matches + num_matches;
+       } else {
+               num_matches = 0;
+       }
+       c->match_window_pos++;
+       *matches_ret = matches;
+       return num_matches;
+}
  
-       LZX_ASSERT(n <= ctx->match_window_end - ctx->match_window_pos);
+static unsigned
+lzx_get_matches_usecache(struct lzx_compressor *c,
+                        const struct lz_match **matches_ret)
+{
+       struct lz_match *cache_ptr;
+       struct lz_match *matches;
+       unsigned num_matches;
  
-       cache_ptr = ctx->cache_ptr;
-       ctx->match_window_pos += n;
-       if (ctx->matches_cached) {
-               while (n-- && cache_ptr <= ctx->cache_limit)
-                       cache_ptr += 1 + cache_ptr->len;
+       cache_ptr = c->cache_ptr;
+       matches = cache_ptr + 1;
+       if (cache_ptr <= c->cache_limit) {
+               num_matches = cache_ptr->len;
+               c->cache_ptr = matches + num_matches;
         } else {
-               lz_bt_skip_positions(&ctx->mf, n);
-               while (n-- && cache_ptr <= ctx->cache_limit) {
+               num_matches = 0;
+       }
+       c->match_window_pos++;
+       *matches_ret = matches;
+       return num_matches;
+}
+
+static unsigned
+lzx_get_matches_usecache_nocheck(struct lzx_compressor *c,
+                                const struct lz_match **matches_ret)
+{
+       struct lz_match *cache_ptr;
+       struct lz_match *matches;
+       unsigned num_matches;
+
+       cache_ptr = c->cache_ptr;
+       matches = cache_ptr + 1;
+       num_matches = cache_ptr->len;
+       c->cache_ptr = matches + num_matches;
+       c->match_window_pos++;
+       *matches_ret = matches;
+       return num_matches;
+}
+
+static unsigned
+lzx_get_matches_nocache_singleblock(struct lzx_compressor *c,
+                                   const struct lz_match **matches_ret)
+{
+       struct lz_match *matches;
+       unsigned num_matches;
+
+       matches = c->cache_ptr;
+       num_matches = lz_mf_get_matches(c->mf, matches);
+       c->match_window_pos++;
+       *matches_ret = matches;
+       return num_matches;
+}
+
+static unsigned
+lzx_get_matches_nocache_multiblock(struct lzx_compressor *c,
+                                  const struct lz_match **matches_ret)
+{
+       struct lz_match *matches;
+       unsigned num_matches;
+
+       matches = c->cache_ptr;
+       num_matches = lz_mf_get_matches(c->mf, matches);
+       num_matches = maybe_truncate_matches(matches, num_matches, c);
+       c->match_window_pos++;
+       *matches_ret = matches;
+       return num_matches;
+}
+
+/*
+ * Find matches at the next position in the window.
+ *
+ * Returns the number of matches found and sets *matches_ret to point to the
+ * matches array.  The matches will be sorted by strictly increasing length and
+ * offset.
+ */
+static inline unsigned
+lzx_get_matches(struct lzx_compressor *c,
+               const struct lz_match **matches_ret)
+{
+       return (*c->get_matches_func)(c, matches_ret);
+}
+
+static void
+lzx_skip_bytes_fillcache(struct lzx_compressor *c, unsigned n)
+{
+       struct lz_match *cache_ptr;
+
+       cache_ptr = c->cache_ptr;
+       c->match_window_pos += n;
+       lz_mf_skip_positions(c->mf, n);
+       if (cache_ptr <= c->cache_limit) {
+               do {
                         cache_ptr->len = 0;
                         cache_ptr += 1;
-               }
+               } while (--n && cache_ptr <= c->cache_limit);
+       }
+       c->cache_ptr = cache_ptr;
+}
+
+static void
+lzx_skip_bytes_usecache(struct lzx_compressor *c, unsigned n)
+{
+       struct lz_match *cache_ptr;
+
+       cache_ptr = c->cache_ptr;
+       c->match_window_pos += n;
+       if (cache_ptr <= c->cache_limit) {
+               do {
+                       cache_ptr += 1 + cache_ptr->len;
+               } while (--n && cache_ptr <= c->cache_limit);
         }
-       ctx->cache_ptr = cache_ptr;
+       c->cache_ptr = cache_ptr;
+}
+
+static void
+lzx_skip_bytes_usecache_nocheck(struct lzx_compressor *c, unsigned n)
+{
+       struct lz_match *cache_ptr;
+
+       cache_ptr = c->cache_ptr;
+       c->match_window_pos += n;
+       do {
+               cache_ptr += 1 + cache_ptr->len;
+       } while (--n);
+       c->cache_ptr = cache_ptr;
+}
+
+static void
+lzx_skip_bytes_nocache(struct lzx_compressor *c, unsigned n)
+{
+       c->match_window_pos += n;
+       lz_mf_skip_positions(c->mf, n);
+}
+
+/*
+ * Skip the specified number of positions in the window (don't search for
+ * matches at them).
+ */
+static inline void
+lzx_skip_bytes(struct lzx_compressor *c, unsigned n)
+{
+       return (*c->skip_bytes_func)(c, n);
  }
  
  /*
@@ -1345,39 +1350,39 @@ lzx_skip_bytes(struct lzx_compressor *ctx, unsigned n)
   * Returns the first match in the list.
   */
  static struct lz_match
-lzx_match_chooser_reverse_list(struct lzx_compressor *ctx, unsigned cur_pos)
+lzx_match_chooser_reverse_list(struct lzx_compressor *c, unsigned cur_pos)
  {
         unsigned prev_link, saved_prev_link;
         unsigned prev_match_offset, saved_prev_match_offset;
  
-       ctx->optimum_end_idx = cur_pos;
+       c->optimum_end_idx = cur_pos;
  
-       saved_prev_link = ctx->optimum[cur_pos].prev.link;
-       saved_prev_match_offset = ctx->optimum[cur_pos].prev.match_offset;
+       saved_prev_link = c->optimum[cur_pos].prev.link;
+       saved_prev_match_offset = c->optimum[cur_pos].prev.match_offset;
  
         do {
                 prev_link = saved_prev_link;
                 prev_match_offset = saved_prev_match_offset;
  
-               saved_prev_link = ctx->optimum[prev_link].prev.link;
-               saved_prev_match_offset = ctx->optimum[prev_link].prev.match_offset;
+               saved_prev_link = c->optimum[prev_link].prev.link;
+               saved_prev_match_offset = c->optimum[prev_link].prev.match_offset;
  
-               ctx->optimum[prev_link].next.link = cur_pos;
-               ctx->optimum[prev_link].next.match_offset = prev_match_offset;
+               c->optimum[prev_link].next.link = cur_pos;
+               c->optimum[prev_link].next.match_offset = prev_match_offset;
  
                 cur_pos = prev_link;
         } while (cur_pos != 0);
  
-       ctx->optimum_cur_idx = ctx->optimum[0].next.link;
+       c->optimum_cur_idx = c->optimum[0].next.link;
  
         return (struct lz_match)
-               { .len = ctx->optimum_cur_idx,
-                 .offset = ctx->optimum[0].next.match_offset,
+               { .len = c->optimum_cur_idx,
+                 .offset = c->optimum[0].next.match_offset,
                 };
  }
  
  /*
- * lzx_get_near_optimal_match() -
+ * lzx_choose_near_optimal_match() -
   *
   * Choose an approximately optimal match or literal to use at the next position
   * in the string, or "window", being LZ-encoded.
@@ -1439,7 +1444,7 @@ lzx_match_chooser_reverse_list(struct lzx_compressor *ctx, unsigned cur_pos)
   * chosen.  For literals, the length is 0 or 1 and the offset is meaningless.
   */
  static struct lz_match
-lzx_get_near_optimal_match(struct lzx_compressor *ctx)
+lzx_choose_near_optimal_item(struct lzx_compressor *c)
  {
         unsigned num_matches;
         const struct lz_match *matches;
@@ -1450,30 +1455,30 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
         unsigned cur_pos;
         unsigned end_pos;
  
-       if (ctx->optimum_cur_idx != ctx->optimum_end_idx) {
+       if (c->optimum_cur_idx != c->optimum_end_idx) {
                 /* Case 2: Return the next match/literal already found.  */
-               match.len = ctx->optimum[ctx->optimum_cur_idx].next.link -
-                                   ctx->optimum_cur_idx;
-               match.offset = ctx->optimum[ctx->optimum_cur_idx].next.match_offset;
+               match.len = c->optimum[c->optimum_cur_idx].next.link -
+                                   c->optimum_cur_idx;
+               match.offset = c->optimum[c->optimum_cur_idx].next.match_offset;
  
-               ctx->optimum_cur_idx = ctx->optimum[ctx->optimum_cur_idx].next.link;
+               c->optimum_cur_idx = c->optimum[c->optimum_cur_idx].next.link;
                 return match;
         }
  
         /* Case 1:  Compute a new list of matches/literals to return.  */
  
-       ctx->optimum_cur_idx = 0;
-       ctx->optimum_end_idx = 0;
+       c->optimum_cur_idx = 0;
+       c->optimum_end_idx = 0;
  
         /* Search for matches at recent offsets.  Only keep the one with the
          * longest match length.  */
         longest_rep_len = LZX_MIN_MATCH_LEN - 1;
-       if (ctx->match_window_pos >= 1) {
+       if (c->match_window_pos >= 1) {
                 unsigned limit = min(LZX_MAX_MATCH_LEN,
-                                    ctx->match_window_end - ctx->match_window_pos);
+                                    c->match_window_end - c->match_window_pos);
                 for (int i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) {
-                       u32 offset = ctx->queue.R[i];
-                       const u8 *strptr = &ctx->window[ctx->match_window_pos];
+                       u32 offset = c->queue.R[i];
+                       const u8 *strptr = &c->cur_window[c->match_window_pos];
                         const u8 *matchptr = strptr - offset;
                         unsigned len = 0;
                         while (len < limit && strptr[len] == matchptr[len])
@@ -1486,8 +1491,8 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
         }
  
         /* If there's a long match with a recent offset, take it.  */
-       if (longest_rep_len >= ctx->params.alg_params.slow.nice_match_length) {
-               lzx_skip_bytes(ctx, longest_rep_len);
+       if (longest_rep_len >= c->params.nice_match_length) {
+               lzx_skip_bytes(c, longest_rep_len);
                 return (struct lz_match) {
                         .len = longest_rep_len,
                         .offset = longest_rep_offset,
@@ -1495,13 +1500,13 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
         }
  
         /* Search other matches.  */
-       num_matches = lzx_get_matches(ctx, &matches);
+       num_matches = lzx_get_matches(c, &matches);
  
         /* If there's a long match, take it.  */
         if (num_matches) {
                 longest_len = matches[num_matches - 1].len;
-               if (longest_len >= ctx->params.alg_params.slow.nice_match_length) {
-                       lzx_skip_bytes(ctx, longest_len - 1);
+               if (longest_len >= c->params.nice_match_length) {
+                       lzx_skip_bytes(c, longest_len - 1);
                         return matches[num_matches - 1];
                 }
         } else {
@@ -1510,10 +1515,10 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
  
         /* Calculate the cost to reach the next position by coding a literal.
          */
-       ctx->optimum[1].queue = ctx->queue;
-       ctx->optimum[1].cost = lzx_literal_cost(ctx->window[ctx->match_window_pos - 1],
-                                               &ctx->costs);
-       ctx->optimum[1].prev.link = 0;
+       c->optimum[1].queue = c->queue;
+       c->optimum[1].cost = lzx_literal_cost(c->cur_window[c->match_window_pos - 1],
+                                             &c->costs);
+       c->optimum[1].prev.link = 0;
  
         /* Calculate the cost to reach any position up to and including that
          * reached by the longest match.
@@ -1534,14 +1539,14 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
                 unsigned num_extra_bits;
  
                 offset = matches[i].offset;
-               queue = ctx->queue;
+               queue = c->queue;
                 position_cost = 0;
  
                 position_slot = lzx_get_position_slot(offset, &queue);
                 num_extra_bits = lzx_get_num_extra_bits(position_slot);
                 if (num_extra_bits >= 3) {
                         position_cost += num_extra_bits - 3;
-                       position_cost += ctx->costs.aligned[(offset + LZX_OFFSET_OFFSET) & 7];
+                       position_cost += c->costs.aligned[(offset + LZX_OFFSET_OFFSET) & 7];
                 } else {
                         position_cost += num_extra_bits;
                 }
@@ -1555,14 +1560,14 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
  
                         len_header = min(len - LZX_MIN_MATCH_LEN, LZX_NUM_PRIMARY_LENS);
                         main_symbol = ((position_slot << 3) | len_header) + LZX_NUM_CHARS;
-                       cost += ctx->costs.main[main_symbol];
+                       cost += c->costs.main[main_symbol];
                         if (len_header == LZX_NUM_PRIMARY_LENS)
-                               cost += ctx->costs.len[len - LZX_MIN_MATCH_LEN - LZX_NUM_PRIMARY_LENS];
+                               cost += c->costs.len[len - LZX_MIN_MATCH_LEN - LZX_NUM_PRIMARY_LENS];
  
-                       ctx->optimum[len].queue = queue;
-                       ctx->optimum[len].prev.link = 0;
-                       ctx->optimum[len].prev.match_offset = offset;
-                       ctx->optimum[len].cost = cost;
+                       c->optimum[len].queue = queue;
+                       c->optimum[len].prev.link = 0;
+                       c->optimum[len].prev.match_offset = offset;
+                       c->optimum[len].cost = cost;
                 } while (++len <= matches[i].len);
         }
         end_pos = longest_len;
@@ -1572,16 +1577,16 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
                 u32 cost;
  
                 while (end_pos < longest_rep_len)
-                       ctx->optimum[++end_pos].cost = MC_INFINITE_COST;
+                       c->optimum[++end_pos].cost = MC_INFINITE_COST;
  
-               queue = ctx->queue;
+               queue = c->queue;
                 cost = lzx_match_cost(longest_rep_len, longest_rep_offset,
-                                     &ctx->costs, &queue);
-               if (cost <= ctx->optimum[longest_rep_len].cost) {
-                       ctx->optimum[longest_rep_len].queue = queue;
-                       ctx->optimum[longest_rep_len].prev.link = 0;
-                       ctx->optimum[longest_rep_len].prev.match_offset = longest_rep_offset;
-                       ctx->optimum[longest_rep_len].cost = cost;
+                                     &c->costs, &queue);
+               if (cost <= c->optimum[longest_rep_len].cost) {
+                       c->optimum[longest_rep_len].queue = queue;
+                       c->optimum[longest_rep_len].prev.link = 0;
+                       c->optimum[longest_rep_len].prev.match_offset = longest_rep_offset;
+                       c->optimum[longest_rep_len].cost = cost;
                 }
         }
  
@@ -1589,7 +1594,7 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
          * position.  The algorithm may find multiple paths to reach each
          * position; only the lowest-cost path is saved.
          *
-        * The progress of the parse is tracked in the @ctx->optimum array, which
+        * The progress of the parse is tracked in the @c->optimum array, which
          * for each position contains the minimum cost to reach that position,
          * the index of the start of the match/literal taken to reach that
          * position through the minimum-cost path, the offset of the match taken
@@ -1615,7 +1620,7 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
          *    match/literal list.
          *
          * 3. Failing either of the above in a degenerate case, the loop
-        *    terminates when space in the @ctx->optimum array is exhausted.
+        *    terminates when space in the @c->optimum array is exhausted.
          *    This terminates the algorithm and forces it to start returning
          *    matches/literals even though they may not be globally optimal.
          *
@@ -1634,16 +1639,16 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
                 cur_pos++;
  
                 /* Check termination conditions (2) and (3) noted above.  */
-               if (cur_pos == end_pos || cur_pos == LZX_OPTIM_ARRAY_SIZE)
-                       return lzx_match_chooser_reverse_list(ctx, cur_pos);
+               if (cur_pos == end_pos || cur_pos == LZX_OPTIM_ARRAY_LENGTH)
+                       return lzx_match_chooser_reverse_list(c, cur_pos);
  
                 /* Search for matches at recent offsets.  */
                 longest_rep_len = LZX_MIN_MATCH_LEN - 1;
                 unsigned limit = min(LZX_MAX_MATCH_LEN,
-                                    ctx->match_window_end - ctx->match_window_pos);
+                                    c->match_window_end - c->match_window_pos);
                 for (int i = 0; i < LZX_NUM_RECENT_OFFSETS; i++) {
-                       u32 offset = ctx->optimum[cur_pos].queue.R[i];
-                       const u8 *strptr = &ctx->window[ctx->match_window_pos];
+                       u32 offset = c->optimum[cur_pos].queue.R[i];
+                       const u8 *strptr = &c->cur_window[c->match_window_pos];
                         const u8 *matchptr = strptr - offset;
                         unsigned len = 0;
                         while (len < limit && strptr[len] == matchptr[len])
@@ -1656,42 +1661,42 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
  
                 /* If we found a long match at a recent offset, choose it
                  * immediately.  */
-               if (longest_rep_len >= ctx->params.alg_params.slow.nice_match_length) {
+               if (longest_rep_len >= c->params.nice_match_length) {
                         /* Build the list of matches to return and get
                          * the first one.  */
-                       match = lzx_match_chooser_reverse_list(ctx, cur_pos);
+                       match = lzx_match_chooser_reverse_list(c, cur_pos);
  
                         /* Append the long match to the end of the list.  */
-                       ctx->optimum[cur_pos].next.match_offset = longest_rep_offset;
-                       ctx->optimum[cur_pos].next.link = cur_pos + longest_rep_len;
-                       ctx->optimum_end_idx = cur_pos + longest_rep_len;
+                       c->optimum[cur_pos].next.match_offset = longest_rep_offset;
+                       c->optimum[cur_pos].next.link = cur_pos + longest_rep_len;
+                       c->optimum_end_idx = cur_pos + longest_rep_len;
  
                         /* Skip over the remaining bytes of the long match.  */
-                       lzx_skip_bytes(ctx, longest_rep_len);
+                       lzx_skip_bytes(c, longest_rep_len);
  
                         /* Return first match in the list.  */
                         return match;
                 }
  
                 /* Search other matches.  */
-               num_matches = lzx_get_matches(ctx, &matches);
+               num_matches = lzx_get_matches(c, &matches);
  
                 /* If there's a long match, take it.  */
                 if (num_matches) {
                         longest_len = matches[num_matches - 1].len;
-                       if (longest_len >= ctx->params.alg_params.slow.nice_match_length) {
+                       if (longest_len >= c->params.nice_match_length) {
                                 /* Build the list of matches to return and get
                                  * the first one.  */
-                               match = lzx_match_chooser_reverse_list(ctx, cur_pos);
+                               match = lzx_match_chooser_reverse_list(c, cur_pos);
  
                                 /* Append the long match to the end of the list.  */
-                               ctx->optimum[cur_pos].next.match_offset =
+                               c->optimum[cur_pos].next.match_offset =
                                         matches[num_matches - 1].offset;
-                               ctx->optimum[cur_pos].next.link = cur_pos + longest_len;
-                               ctx->optimum_end_idx = cur_pos + longest_len;
+                               c->optimum[cur_pos].next.link = cur_pos + longest_len;
+                               c->optimum_end_idx = cur_pos + longest_len;
  
                                 /* Skip over the remaining bytes of the long match.  */
-                               lzx_skip_bytes(ctx, longest_len - 1);
+                               lzx_skip_bytes(c, longest_len - 1);
  
                                 /* Return first match in the list.  */
                                 return match;
@@ -1701,16 +1706,16 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
                 }
  
                 while (end_pos < cur_pos + longest_len)
-                       ctx->optimum[++end_pos].cost = MC_INFINITE_COST;
+                       c->optimum[++end_pos].cost = MC_INFINITE_COST;
  
                 /* Consider coding a literal.  */
-               cost = ctx->optimum[cur_pos].cost +
-                       lzx_literal_cost(ctx->window[ctx->match_window_pos - 1],
-                                        &ctx->costs);
-               if (cost < ctx->optimum[cur_pos + 1].cost) {
-                       ctx->optimum[cur_pos + 1].queue = ctx->optimum[cur_pos].queue;
-                       ctx->optimum[cur_pos + 1].cost = cost;
-                       ctx->optimum[cur_pos + 1].prev.link = cur_pos;
+               cost = c->optimum[cur_pos].cost +
+                       lzx_literal_cost(c->cur_window[c->match_window_pos - 1],
+                                        &c->costs);
+               if (cost < c->optimum[cur_pos + 1].cost) {
+                       c->optimum[cur_pos + 1].queue = c->optimum[cur_pos].queue;
+                       c->optimum[cur_pos + 1].cost = cost;
+                       c->optimum[cur_pos + 1].prev.link = cur_pos;
                 }
  
                 /* Consider coding a match.
@@ -1729,14 +1734,14 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
                         unsigned num_extra_bits;
  
                         offset = matches[i].offset;
-                       queue = ctx->optimum[cur_pos].queue;
-                       position_cost = ctx->optimum[cur_pos].cost;
+                       queue = c->optimum[cur_pos].queue;
+                       position_cost = c->optimum[cur_pos].cost;
  
                         position_slot = lzx_get_position_slot(offset, &queue);
                         num_extra_bits = lzx_get_num_extra_bits(position_slot);
                         if (num_extra_bits >= 3) {
                                 position_cost += num_extra_bits - 3;
-                               position_cost += ctx->costs.aligned[
+                               position_cost += c->costs.aligned[
                                                 (offset + LZX_OFFSET_OFFSET) & 7];
                         } else {
                                 position_cost += num_extra_bits;
@@ -1753,17 +1758,17 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
                                                  LZX_NUM_PRIMARY_LENS);
                                 main_symbol = ((position_slot << 3) | len_header) +
                                                 LZX_NUM_CHARS;
-                               cost += ctx->costs.main[main_symbol];
+                               cost += c->costs.main[main_symbol];
                                 if (len_header == LZX_NUM_PRIMARY_LENS) {
-                                       cost += ctx->costs.len[len -
+                                       cost += c->costs.len[len -
                                                         LZX_MIN_MATCH_LEN -
                                                         LZX_NUM_PRIMARY_LENS];
                                 }
-                               if (cost < ctx->optimum[cur_pos + len].cost) {
-                                       ctx->optimum[cur_pos + len].queue = queue;
-                                       ctx->optimum[cur_pos + len].prev.link = cur_pos;
-                                       ctx->optimum[cur_pos + len].prev.match_offset = offset;
-                                       ctx->optimum[cur_pos + len].cost = cost;
+                               if (cost < c->optimum[cur_pos + len].cost) {
+                                       c->optimum[cur_pos + len].queue = queue;
+                                       c->optimum[cur_pos + len].prev.link = cur_pos;
+                                       c->optimum[cur_pos + len].prev.match_offset = offset;
+                                       c->optimum[cur_pos + len].cost = cost;
                                 }
                         } while (++len <= matches[i].len);
                 }
@@ -1772,27 +1777,90 @@ lzx_get_near_optimal_match(struct lzx_compressor *ctx)
                         struct lzx_lru_queue queue;
  
                         while (end_pos < cur_pos + longest_rep_len)
-                               ctx->optimum[++end_pos].cost = MC_INFINITE_COST;
+                               c->optimum[++end_pos].cost = MC_INFINITE_COST;
  
-                       queue = ctx->optimum[cur_pos].queue;
+                       queue = c->optimum[cur_pos].queue;
  
-                       cost = ctx->optimum[cur_pos].cost +
+                       cost = c->optimum[cur_pos].cost +
                                 lzx_match_cost(longest_rep_len, longest_rep_offset,
-                                              &ctx->costs, &queue);
-                       if (cost <= ctx->optimum[cur_pos + longest_rep_len].cost) {
-                               ctx->optimum[cur_pos + longest_rep_len].queue =
+                                              &c->costs, &queue);
+                       if (cost <= c->optimum[cur_pos + longest_rep_len].cost) {
+                               c->optimum[cur_pos + longest_rep_len].queue =
                                         queue;
-                               ctx->optimum[cur_pos + longest_rep_len].prev.link =
+                               c->optimum[cur_pos + longest_rep_len].prev.link =
                                         cur_pos;
-                               ctx->optimum[cur_pos + longest_rep_len].prev.match_offset =
+                               c->optimum[cur_pos + longest_rep_len].prev.match_offset =
                                         longest_rep_offset;
-                               ctx->optimum[cur_pos + longest_rep_len].cost =
+                               c->optimum[cur_pos + longest_rep_len].cost =
                                         cost;
                         }
                 }
         }
  }
  
+static struct lz_match
+lzx_choose_lazy_item(struct lzx_compressor *c)
+{
+       const struct lz_match *matches;
+       struct lz_match cur_match;
+       struct lz_match next_match;
+       u32 num_matches;
+
+       if (c->prev_match.len) {
+               cur_match = c->prev_match;
+               c->prev_match.len = 0;
+       } else {
+               num_matches = lzx_get_matches(c, &matches);
+               if (num_matches == 0 ||
+                   (matches[num_matches - 1].len <= 3 &&
+                    (matches[num_matches - 1].len <= 2 ||
+                     matches[num_matches - 1].offset > 4096)))
+               {
+                       return (struct lz_match) { };
+               }
+
+               cur_match = matches[num_matches - 1];
+       }
+
+       if (cur_match.len >= c->params.nice_match_length) {
+               lzx_skip_bytes(c, cur_match.len - 1);
+               return cur_match;
+       }
+
+       num_matches = lzx_get_matches(c, &matches);
+       if (num_matches == 0 ||
+           (matches[num_matches - 1].len <= 3 &&
+            (matches[num_matches - 1].len <= 2 ||
+             matches[num_matches - 1].offset > 4096)))
+       {
+               lzx_skip_bytes(c, cur_match.len - 2);
+               return cur_match;
+       }
+
+       next_match = matches[num_matches - 1];
+
+       if (next_match.len <= cur_match.len) {
+               lzx_skip_bytes(c, cur_match.len - 2);
+               return cur_match;
+       } else {
+               c->prev_match = next_match;
+               return (struct lz_match) { };
+       }
+}
+
+/*
+ * Return the next match or literal to use, delegating to the currently selected
+ * match-choosing algorithm.
+ *
+ * If the length of the returned 'struct lz_match' is less than
+ * LZX_MIN_MATCH_LEN, then it is really a literal.
+ */
+static inline struct lz_match
+lzx_choose_item(struct lzx_compressor *c)
+{
+       return (*c->params.choose_item_func)(c);
+}
+
  /* Set default symbol costs for the LZX Huffman codes.  */
  static void
  lzx_set_default_costs(struct lzx_costs * costs, unsigned num_main_syms)
@@ -1842,530 +1910,372 @@ lzx_choose_verbatim_or_aligned(const struct lzx_freqs * freqs,
                 return LZX_BLOCKTYPE_VERBATIM;
  }
  
-/* Find a near-optimal sequence of matches/literals with which to output the
- * specified LZX block, then set the block's type to that which has the minimum
- * cost to output (either verbatim or aligned).  */
+/* Find a sequence of matches/literals with which to output the specified LZX
+ * block, then set the block's type to that which has the minimum cost to output
+ * (either verbatim or aligned).  */
  static void
-lzx_optimize_block(struct lzx_compressor *ctx, struct lzx_block_spec *spec,
-                  unsigned num_passes)
+lzx_choose_items_for_block(struct lzx_compressor *c, struct lzx_block_spec *spec)
  {
-       const struct lzx_lru_queue orig_queue = ctx->queue;
-       unsigned num_passes_remaining = num_passes;
+       const struct lzx_lru_queue orig_queue = c->queue;
+       u32 num_passes_remaining = c->params.num_optim_passes;
         struct lzx_freqs freqs;
         const u8 *window_ptr;
         const u8 *window_end;
-       struct lzx_item *next_chosen_match;
+       struct lzx_item *next_chosen_item;
         struct lz_match lz_match;
         struct lzx_item lzx_item;
  
         LZX_ASSERT(num_passes >= 1);
-       LZX_ASSERT(lz_bt_get_position(&ctx->mf) == spec->window_pos);
+       LZX_ASSERT(lz_mf_get_position(c->mf) == spec->window_pos);
  
-       ctx->match_window_end = spec->window_pos + spec->block_size;
-       ctx->matches_cached = false;
+       c->match_window_end = spec->window_pos + spec->block_size;
+
+       if (c->params.num_optim_passes > 1) {
+               if (spec->block_size == c->cur_window_size)
+                       c->get_matches_func = lzx_get_matches_fillcache_singleblock;
+               else
+                       c->get_matches_func = lzx_get_matches_fillcache_multiblock;
+               c->skip_bytes_func = lzx_skip_bytes_fillcache;
+       } else {
+               if (spec->block_size == c->cur_window_size)
+                       c->get_matches_func = lzx_get_matches_nocache_singleblock;
+               else
+                       c->get_matches_func = lzx_get_matches_nocache_multiblock;
+               c->skip_bytes_func = lzx_skip_bytes_nocache;
+       }
  
         /* The first optimal parsing pass is done using the cost model already
-        * set in ctx->costs.  Each later pass is done using a cost model
+        * set in c->costs.  Each later pass is done using a cost model
          * computed from the previous pass.
          *
          * To improve performance we only generate the array containing the
          * matches and literals in intermediate form on the final pass.  */
  
         while (--num_passes_remaining) {
-               ctx->match_window_pos = spec->window_pos;
-               ctx->cache_ptr = ctx->cached_matches;
+               c->match_window_pos = spec->window_pos;
+               c->cache_ptr = c->cached_matches;
                 memset(&freqs, 0, sizeof(freqs));
-               window_ptr = &ctx->window[spec->window_pos];
+               window_ptr = &c->cur_window[spec->window_pos];
                 window_end = window_ptr + spec->block_size;
  
                 while (window_ptr != window_end) {
  
-                       lz_match = lzx_get_near_optimal_match(ctx);
+                       lz_match = lzx_choose_item(c);
  
                         LZX_ASSERT(!(lz_match.len == LZX_MIN_MATCH_LEN &&
-                                    lz_match.offset == ctx->max_window_size -
+                                    lz_match.offset == c->max_window_size -
                                                          LZX_MIN_MATCH_LEN));
                         if (lz_match.len >= LZX_MIN_MATCH_LEN) {
                                 lzx_tally_match(lz_match.len, lz_match.offset,
-                                               &freqs, &ctx->queue);
+                                               &freqs, &c->queue);
                                 window_ptr += lz_match.len;
                         } else {
                                 lzx_tally_literal(*window_ptr, &freqs);
                                 window_ptr += 1;
                         }
                 }
-               lzx_make_huffman_codes(&freqs, &spec->codes, ctx->num_main_syms);
-               lzx_set_costs(ctx, &spec->codes.lens);
-               ctx->queue = orig_queue;
-               ctx->matches_cached = true;
+               lzx_make_huffman_codes(&freqs, &spec->codes, c->num_main_syms);
+               lzx_set_costs(c, &spec->codes.lens, 15);
+               c->queue = orig_queue;
+               if (c->cache_ptr <= c->cache_limit) {
+                       c->get_matches_func = lzx_get_matches_usecache_nocheck;
+                       c->skip_bytes_func = lzx_skip_bytes_usecache_nocheck;
+               } else {
+                       c->get_matches_func = lzx_get_matches_usecache;
+                       c->skip_bytes_func = lzx_skip_bytes_usecache;
+               }
         }
  
-       ctx->match_window_pos = spec->window_pos;
-       ctx->cache_ptr = ctx->cached_matches;
+       c->match_window_pos = spec->window_pos;
+       c->cache_ptr = c->cached_matches;
         memset(&freqs, 0, sizeof(freqs));
-       window_ptr = &ctx->window[spec->window_pos];
+       window_ptr = &c->cur_window[spec->window_pos];
         window_end = window_ptr + spec->block_size;
  
-       spec->chosen_items = &ctx->chosen_items[spec->window_pos];
-       next_chosen_match = spec->chosen_items;
+       spec->chosen_items = &c->chosen_items[spec->window_pos];
+       next_chosen_item = spec->chosen_items;
  
+       unsigned unseen_cost = 9;
         while (window_ptr != window_end) {
-               lz_match = lzx_get_near_optimal_match(ctx);
+
+               lz_match = lzx_choose_item(c);
  
                 LZX_ASSERT(!(lz_match.len == LZX_MIN_MATCH_LEN &&
-                            lz_match.offset == ctx->max_window_size -
+                            lz_match.offset == c->max_window_size -
                                                  LZX_MIN_MATCH_LEN));
                 if (lz_match.len >= LZX_MIN_MATCH_LEN) {
                         lzx_item.data = lzx_tally_match(lz_match.len,
                                                          lz_match.offset,
-                                                        &freqs, &ctx->queue);
+                                                        &freqs, &c->queue);
                         window_ptr += lz_match.len;
                 } else {
                         lzx_item.data = lzx_tally_literal(*window_ptr, &freqs);
                         window_ptr += 1;
                 }
-               *next_chosen_match++ = lzx_item;
+               *next_chosen_item++ = lzx_item;
+
+               /* When doing one-pass "near-optimal" parsing, update the cost
+                * model occassionally.  */
+               if (unlikely((next_chosen_item - spec->chosen_items) % 2048 == 0) &&
+                   c->params.choose_item_func == lzx_choose_near_optimal_item &&
+                   c->params.num_optim_passes == 1)
+               {
+                       lzx_make_huffman_codes(&freqs, &spec->codes, c->num_main_syms);
+                       lzx_set_costs(c, &spec->codes.lens, unseen_cost);
+                       if (unseen_cost < 15)
+                               unseen_cost++;
+               }
         }
-       spec->num_chosen_items = next_chosen_match - spec->chosen_items;
-       lzx_make_huffman_codes(&freqs, &spec->codes, ctx->num_main_syms);
+       spec->num_chosen_items = next_chosen_item - spec->chosen_items;
+       lzx_make_huffman_codes(&freqs, &spec->codes, c->num_main_syms);
         spec->block_type = lzx_choose_verbatim_or_aligned(&freqs, &spec->codes);
  }
  
  /* Prepare the input window into one or more LZX blocks ready to be output.  */
  static void
-lzx_prepare_blocks(struct lzx_compressor * ctx)
+lzx_prepare_blocks(struct lzx_compressor *c)
  {
         /* Set up a default cost model.  */
-       lzx_set_default_costs(&ctx->costs, ctx->num_main_syms);
+       if (c->params.choose_item_func == lzx_choose_near_optimal_item)
+               lzx_set_default_costs(&c->costs, c->num_main_syms);
  
         /* Set up the block specifications.
          * TODO: The compression ratio could be slightly improved by performing
          * data-dependent block splitting instead of using fixed-size blocks.
          * Doing so well is a computationally hard problem, however.  */
-       ctx->num_blocks = DIV_ROUND_UP(ctx->window_size, LZX_DIV_BLOCK_SIZE);
-       for (unsigned i = 0; i < ctx->num_blocks; i++) {
-               unsigned pos = LZX_DIV_BLOCK_SIZE * i;
-               ctx->block_specs[i].window_pos = pos;
-               ctx->block_specs[i].block_size = min(ctx->window_size - pos,
-                                                    LZX_DIV_BLOCK_SIZE);
+       c->num_blocks = DIV_ROUND_UP(c->cur_window_size, LZX_DIV_BLOCK_SIZE);
+       for (unsigned i = 0; i < c->num_blocks; i++) {
+               u32 pos = LZX_DIV_BLOCK_SIZE * i;
+               c->block_specs[i].window_pos = pos;
+               c->block_specs[i].block_size = min(c->cur_window_size - pos,
+                                                  LZX_DIV_BLOCK_SIZE);
         }
  
         /* Load the window into the match-finder.  */
-       lz_bt_load_window(&ctx->mf, ctx->window, ctx->window_size);
+       lz_mf_load_window(c->mf, c->cur_window, c->cur_window_size);
  
         /* Determine sequence of matches/literals to output for each block.  */
-       lzx_lru_queue_init(&ctx->queue);
-       ctx->optimum_cur_idx = 0;
-       ctx->optimum_end_idx = 0;
-       for (unsigned i = 0; i < ctx->num_blocks; i++) {
-               lzx_optimize_block(ctx, &ctx->block_specs[i],
-                                  ctx->params.alg_params.slow.num_optim_passes);
-       }
+       lzx_lru_queue_init(&c->queue);
+       c->optimum_cur_idx = 0;
+       c->optimum_end_idx = 0;
+       c->prev_match.len = 0;
+       for (unsigned i = 0; i < c->num_blocks; i++)
+               lzx_choose_items_for_block(c, &c->block_specs[i]);
  }
  
-/*
- * This is the fast version of lzx_prepare_blocks().  This version "quickly"
- * prepares a single compressed block containing the entire input.  See the
- * description of the "Fast algorithm" at the beginning of this file for more
- * information.
- *
- * Input ---  the preprocessed data:
- *
- *     ctx->window[]
- *     ctx->window_size
- *
- * Output --- the block specification and the corresponding match/literal data:
- *
- *     ctx->block_specs[]
- *     ctx->num_blocks
- *     ctx->chosen_items[]
- */
  static void
-lzx_prepare_block_fast(struct lzx_compressor * ctx)
+lzx_build_params(unsigned int compression_level,
+                u32 max_window_size,
+                struct lzx_compressor_params *lzx_params)
  {
-       struct lzx_record_ctx record_ctx;
-       struct lzx_block_spec *spec;
-
-       /* Parameters to hash chain LZ match finder
-        * (lazy with 1 match lookahead)  */
-       static const struct lz_params lzx_lz_params = {
-               /* Although LZX_MIN_MATCH_LEN == 2, length 2 matches typically
-                * aren't worth choosing when using greedy or lazy parsing.  */
-               .min_match      = 3,
-               .max_match      = LZX_MAX_MATCH_LEN,
-               .max_offset     = LZX_MAX_WINDOW_SIZE,
-               .good_match     = LZX_MAX_MATCH_LEN,
-               .nice_match     = LZX_MAX_MATCH_LEN,
-               .max_chain_len  = LZX_MAX_MATCH_LEN,
-               .max_lazy_match = LZX_MAX_MATCH_LEN,
-               .too_far        = 4096,
-       };
-
-       /* Initialize symbol frequencies and match offset LRU queue.  */
-       memset(&record_ctx.freqs, 0, sizeof(struct lzx_freqs));
-       lzx_lru_queue_init(&record_ctx.queue);
-       record_ctx.items = ctx->chosen_items;
-
-       /* Determine series of matches/literals to output.  */
-       lz_analyze_block(ctx->window,
-                        ctx->window_size,
-                        lzx_record_match,
-                        lzx_record_literal,
-                        &record_ctx,
-                        &lzx_lz_params,
-                        ctx->prev_tab);
-
-       /* Set up block specification.  */
-       spec = &ctx->block_specs[0];
-       spec->block_type = LZX_BLOCKTYPE_ALIGNED;
-       spec->window_pos = 0;
-       spec->block_size = ctx->window_size;
-       spec->num_chosen_items = record_ctx.items - ctx->chosen_items;
-       spec->chosen_items = ctx->chosen_items;
-       lzx_make_huffman_codes(&record_ctx.freqs, &spec->codes,
-                              ctx->num_main_syms);
-       ctx->num_blocks = 1;
+       if (compression_level < 25) {
+               lzx_params->choose_item_func = lzx_choose_lazy_item;
+               lzx_params->num_optim_passes  = 1;
+               if (max_window_size <= 262144)
+                       lzx_params->mf_algo = LZ_MF_HASH_CHAINS;
+               else
+                       lzx_params->mf_algo = LZ_MF_BINARY_TREES;
+               lzx_params->min_match_length  = 3;
+               lzx_params->nice_match_length = 25 + compression_level * 2;
+               lzx_params->max_search_depth  = 25 + compression_level;
+       } else {
+               lzx_params->choose_item_func = lzx_choose_near_optimal_item;
+               lzx_params->num_optim_passes  = compression_level / 20;
+               if (max_window_size <= 32768 && lzx_params->num_optim_passes == 1)
+                       lzx_params->mf_algo = LZ_MF_HASH_CHAINS;
+               else
+                       lzx_params->mf_algo = LZ_MF_BINARY_TREES;
+               lzx_params->min_match_length  = (compression_level >= 45) ? 2 : 3;
+               lzx_params->nice_match_length = min(((u64)compression_level * 32) / 50,
+                                                   LZX_MAX_MATCH_LEN);
+               lzx_params->max_search_depth  = min(((u64)compression_level * 50) / 50,
+                                                   LZX_MAX_MATCH_LEN);
+       }
  }
  
-static size_t
-lzx_compress(const void *uncompressed_data, size_t uncompressed_size,
-            void *compressed_data, size_t compressed_size_avail, void *_ctx)
+static void
+lzx_build_mf_params(const struct lzx_compressor_params *lzx_params,
+                   u32 max_window_size, struct lz_mf_params *mf_params)
  {
-       struct lzx_compressor *ctx = _ctx;
-       struct output_bitstream ostream;
-       size_t compressed_size;
-
-       if (uncompressed_size < 100) {
-               LZX_DEBUG("Too small to bother compressing.");
-               return 0;
-       }
-
-       if (uncompressed_size > ctx->max_window_size) {
-               LZX_DEBUG("Can't compress %zu bytes using window of %u bytes!",
-                         uncompressed_size, ctx->max_window_size);
-               return 0;
-       }
-
-       LZX_DEBUG("Attempting to compress %zu bytes...",
-                 uncompressed_size);
-
-       /* The input data must be preprocessed.  To avoid changing the original
-        * input, copy it to a temporary buffer.  */
-       memcpy(ctx->window, uncompressed_data, uncompressed_size);
-       ctx->window_size = uncompressed_size;
-
-       /* This line is unnecessary; it just avoids inconsequential accesses of
-        * uninitialized memory that would show up in memory-checking tools such
-        * as valgrind.  */
-       memset(&ctx->window[ctx->window_size], 0, 12);
-
-       LZX_DEBUG("Preprocessing data...");
-
-       /* Before doing any actual compression, do the call instruction (0xe8
-        * byte) translation on the uncompressed data.  */
-       lzx_do_e8_preprocessing(ctx->window, ctx->window_size);
-
-       LZX_DEBUG("Preparing blocks...");
-
-       /* Prepare the compressed data.  */
-       if (ctx->params.algorithm == WIMLIB_LZX_ALGORITHM_FAST)
-               lzx_prepare_block_fast(ctx);
-       else
-               lzx_prepare_blocks(ctx);
+       memset(mf_params, 0, sizeof(*mf_params));
+
+       mf_params->algorithm = lzx_params->mf_algo;
+       mf_params->max_window_size = max_window_size;
+       mf_params->min_match_len = lzx_params->min_match_length;
+       mf_params->max_match_len = LZX_MAX_MATCH_LEN;
+       mf_params->max_search_depth = lzx_params->max_search_depth;
+       mf_params->nice_match_len = lzx_params->nice_match_length;
+}
  
-       LZX_DEBUG("Writing compressed blocks...");
+static void
+lzx_free_compressor(void *_c);
  
-       /* Generate the compressed data.  */
-       init_output_bitstream(&ostream, compressed_data, compressed_size_avail);
-       lzx_write_all_blocks(ctx, &ostream);
+static u64
+lzx_get_needed_memory(size_t max_window_size, unsigned int compression_level)
+{
+       struct lzx_compressor_params params;
+       u64 size = 0;
  
-       LZX_DEBUG("Flushing bitstream...");
-       compressed_size = flush_output_bitstream(&ostream);
-       if (compressed_size == (u32)~0UL) {
-               LZX_DEBUG("Data did not compress to %zu bytes or less!",
-                         compressed_size_avail);
+       if (!lzx_window_size_valid(max_window_size))
                 return 0;
-       }
  
-       LZX_DEBUG("Done: compressed %zu => %zu bytes.",
-                 uncompressed_size, compressed_size);
-
-       /* Verify that we really get the same thing back when decompressing.
-        * Although this could be disabled by default in all cases, it only
-        * takes around 2-3% of the running time of the slow algorithm to do the
-        * verification.  */
-       if (ctx->params.algorithm == WIMLIB_LZX_ALGORITHM_SLOW
-       #if defined(ENABLE_LZX_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION)
-           || 1
-       #endif
-           )
-       {
-               struct wimlib_decompressor *decompressor;
+       lzx_build_params(compression_level, max_window_size, &params);
  
-               if (0 == wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_LZX,
-                                                   ctx->max_window_size,
-                                                   NULL,
-                                                   &decompressor))
-               {
-                       int ret;
-                       ret = wimlib_decompress(compressed_data,
-                                               compressed_size,
-                                               ctx->window,
-                                               uncompressed_size,
-                                               decompressor);
-                       wimlib_free_decompressor(decompressor);
-
-                       if (ret) {
-                               ERROR("Failed to decompress data we "
-                                     "compressed using LZX algorithm");
-                               wimlib_assert(0);
-                               return 0;
-                       }
-                       if (memcmp(uncompressed_data, ctx->window, uncompressed_size)) {
-                               ERROR("Data we compressed using LZX algorithm "
-                                     "didn't decompress to original");
-                               wimlib_assert(0);
-                               return 0;
-                       }
-               } else {
-                       WARNING("Failed to create decompressor for "
-                               "data verification!");
-               }
-       }
-       return compressed_size;
-}
+       size += sizeof(struct lzx_compressor);
  
-static void
-lzx_free_compressor(void *_ctx)
-{
-       struct lzx_compressor *ctx = _ctx;
-
-       if (ctx) {
-               FREE(ctx->chosen_items);
-               FREE(ctx->cached_matches);
-               FREE(ctx->optimum);
-               lz_bt_destroy(&ctx->mf);
-               FREE(ctx->block_specs);
-               FREE(ctx->prev_tab);
-               FREE(ctx->window);
-               FREE(ctx);
-       }
-}
+       size += max_window_size;
  
-static const struct wimlib_lzx_compressor_params lzx_fast_default = {
-       .hdr = {
-               .size = sizeof(struct wimlib_lzx_compressor_params),
-       },
-       .algorithm = WIMLIB_LZX_ALGORITHM_FAST,
-       .use_defaults = 0,
-       .alg_params = {
-               .fast = {
-               },
-       },
-};
-static const struct wimlib_lzx_compressor_params lzx_slow_default = {
-       .hdr = {
-               .size = sizeof(struct wimlib_lzx_compressor_params),
-       },
-       .algorithm = WIMLIB_LZX_ALGORITHM_SLOW,
-       .use_defaults = 0,
-       .alg_params = {
-               .slow = {
-                       .use_len2_matches = 1,
-                       .nice_match_length = 32,
-                       .num_optim_passes = 2,
-                       .max_search_depth = 50,
-                       .main_nostat_cost = 15,
-                       .len_nostat_cost = 15,
-                       .aligned_nostat_cost = 7,
-               },
-       },
-};
+       size += DIV_ROUND_UP(max_window_size, LZX_DIV_BLOCK_SIZE) *
+               sizeof(struct lzx_block_spec);
  
-static const struct wimlib_lzx_compressor_params *
-lzx_get_params(const struct wimlib_compressor_params_header *_params)
-{
-       const struct wimlib_lzx_compressor_params *params =
-               (const struct wimlib_lzx_compressor_params*)_params;
+       size += max_window_size * sizeof(struct lzx_item);
  
-       if (params == NULL) {
-               LZX_DEBUG("Using default algorithm and parameters.");
-               params = &lzx_slow_default;
-       } else {
-               if (params->use_defaults) {
-                       if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW)
-                               params = &lzx_slow_default;
-                       else
-                               params = &lzx_fast_default;
-               }
+       size += lz_mf_get_needed_memory(params.mf_algo, max_window_size);
+       if (params.choose_item_func == lzx_choose_near_optimal_item) {
+               size += (LZX_OPTIM_ARRAY_LENGTH + params.nice_match_length) *
+                       sizeof(struct lzx_mc_pos_data);
         }
-       return params;
+       if (params.num_optim_passes > 1)
+               size += LZX_CACHE_LEN * sizeof(struct lz_match);
+       else
+               size += LZX_MAX_MATCHES_PER_POS * sizeof(struct lz_match);
+       return size;
  }
  
  static int
-lzx_create_compressor(size_t window_size,
-                     const struct wimlib_compressor_params_header *_params,
-                     void **ctx_ret)
+lzx_create_compressor(size_t max_window_size, unsigned int compression_level,
+                     void **c_ret)
  {
-       const struct wimlib_lzx_compressor_params *params = lzx_get_params(_params);
-       struct lzx_compressor *ctx;
+       struct lzx_compressor *c;
+       struct lzx_compressor_params params;
+       struct lz_mf_params mf_params;
  
-       LZX_DEBUG("Allocating LZX context...");
+       if (!lzx_window_size_valid(max_window_size))
+               return WIMLIB_ERR_INVALID_PARAM;
  
-       if (!lzx_window_size_valid(window_size))
+       lzx_build_params(compression_level, max_window_size, &params);
+       lzx_build_mf_params(&params, max_window_size, &mf_params);
+       if (!lz_mf_params_valid(&mf_params))
                 return WIMLIB_ERR_INVALID_PARAM;
  
-       ctx = CALLOC(1, sizeof(struct lzx_compressor));
-       if (ctx == NULL)
+       c = CALLOC(1, sizeof(struct lzx_compressor));
+       if (!c)
                 goto oom;
  
-       ctx->num_main_syms = lzx_get_num_main_syms(window_size);
-       ctx->max_window_size = window_size;
-       ctx->window = MALLOC(window_size + 12);
-       if (ctx->window == NULL)
+       c->params = params;
+       c->num_main_syms = lzx_get_num_main_syms(max_window_size);
+       c->max_window_size = max_window_size;
+
+       c->cur_window = ALIGNED_MALLOC(max_window_size, 16);
+       if (!c->cur_window)
                 goto oom;
  
-       if (params->algorithm == WIMLIB_LZX_ALGORITHM_FAST) {
-               ctx->prev_tab = MALLOC(window_size * sizeof(ctx->prev_tab[0]));
-               if (ctx->prev_tab == NULL)
-                       goto oom;
-       }
+       c->block_specs = MALLOC(DIV_ROUND_UP(max_window_size,
+                                            LZX_DIV_BLOCK_SIZE) *
+                               sizeof(struct lzx_block_spec));
+       if (!c->block_specs)
+               goto oom;
  
-       size_t block_specs_length = DIV_ROUND_UP(window_size, LZX_DIV_BLOCK_SIZE);
-       ctx->block_specs = MALLOC(block_specs_length * sizeof(ctx->block_specs[0]));
-       if (ctx->block_specs == NULL)
+       c->chosen_items = MALLOC(max_window_size * sizeof(struct lzx_item));
+       if (!c->chosen_items)
                 goto oom;
  
-       if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
-               unsigned min_match_len = LZX_MIN_MATCH_LEN;
-               if (!params->alg_params.slow.use_len2_matches)
-                       min_match_len = max(min_match_len, 3);
-
-               if (!lz_bt_init(&ctx->mf,
-                               window_size,
-                               min_match_len,
-                               LZX_MAX_MATCH_LEN,
-                               params->alg_params.slow.nice_match_length,
-                               params->alg_params.slow.max_search_depth))
-                       goto oom;
-       }
+       c->mf = lz_mf_alloc(&mf_params);
+       if (!c->mf)
+               goto oom;
  
-       if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
-               ctx->optimum = MALLOC((LZX_OPTIM_ARRAY_SIZE +
-                                      min(params->alg_params.slow.nice_match_length,
-                                          LZX_MAX_MATCH_LEN)) *
-                                               sizeof(ctx->optimum[0]));
-               if (ctx->optimum == NULL)
+       if (params.choose_item_func == lzx_choose_near_optimal_item) {
+               c->optimum = MALLOC((LZX_OPTIM_ARRAY_LENGTH +
+                                    params.nice_match_length) *
+                                   sizeof(struct lzx_mc_pos_data));
+               if (!c->optimum)
                         goto oom;
         }
  
-       if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
-               ctx->cached_matches = MALLOC(LZX_CACHE_SIZE);
-               if (ctx->cached_matches == NULL)
+       if (params.num_optim_passes > 1) {
+               c->cached_matches = MALLOC(LZX_CACHE_LEN *
+                                          sizeof(struct lz_match));
+               if (!c->cached_matches)
+                       goto oom;
+               c->cache_limit = c->cached_matches + LZX_CACHE_LEN -
+                                (LZX_MAX_MATCHES_PER_POS + 1);
+       } else {
+               c->cached_matches = MALLOC(LZX_MAX_MATCHES_PER_POS *
+                                          sizeof(struct lz_match));
+               if (!c->cached_matches)
                         goto oom;
-               ctx->cache_limit = ctx->cached_matches +
-                                  LZX_CACHE_LEN - (LZX_MAX_MATCHES_PER_POS + 1);
         }
  
-       ctx->chosen_items = MALLOC(window_size * sizeof(ctx->chosen_items[0]));
-       if (ctx->chosen_items == NULL)
-               goto oom;
-
-       memcpy(&ctx->params, params, sizeof(struct wimlib_lzx_compressor_params));
-       memset(&ctx->zero_codes, 0, sizeof(ctx->zero_codes));
-
-       LZX_DEBUG("Successfully allocated new LZX context.");
-
-       *ctx_ret = ctx;
+       *c_ret = c;
         return 0;
  
  oom:
-       lzx_free_compressor(ctx);
+       lzx_free_compressor(c);
         return WIMLIB_ERR_NOMEM;
  }
  
-static u64
-lzx_get_needed_memory(size_t max_block_size,
-                     const struct wimlib_compressor_params_header *_params)
+static size_t
+lzx_compress(const void *uncompressed_data, size_t uncompressed_size,
+            void *compressed_data, size_t compressed_size_avail, void *_c)
  {
-       const struct wimlib_lzx_compressor_params *params = lzx_get_params(_params);
-
-       u64 size = 0;
+       struct lzx_compressor *c = _c;
+       struct output_bitstream ostream;
+       size_t compressed_size;
  
-       size += sizeof(struct lzx_compressor);
+       if (uncompressed_size < 100) {
+               LZX_DEBUG("Too small to bother compressing.");
+               return 0;
+       }
  
-       size += max_block_size + 12;
+       LZX_DEBUG("Attempting to compress %zu bytes...",
+                 uncompressed_size);
  
-       size += DIV_ROUND_UP(max_block_size, LZX_DIV_BLOCK_SIZE) *
-               sizeof(((struct lzx_compressor*)0)->block_specs[0]);
+       /* The input data must be preprocessed.  To avoid changing the original
+        * input, copy it to a temporary buffer.  */
+       memcpy(c->cur_window, uncompressed_data, uncompressed_size);
+       c->cur_window_size = uncompressed_size;
  
-       if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW) {
-               size += max_block_size * sizeof(((struct lzx_compressor*)0)->chosen_items[0]);
-               size += lz_bt_get_needed_memory(max_block_size);
-               size += (LZX_OPTIM_ARRAY_SIZE +
-                        min(params->alg_params.slow.nice_match_length,
-                            LZX_MAX_MATCH_LEN)) *
-                               sizeof(((struct lzx_compressor *)0)->optimum[0]);
-               size += LZX_CACHE_SIZE;
-       } else {
-               size += max_block_size * sizeof(((struct lzx_compressor*)0)->prev_tab[0]);
-       }
-       return size;
-}
+       /* Before doing any actual compression, do the call instruction (0xe8
+        * byte) translation on the uncompressed data.  */
+       lzx_do_e8_preprocessing(c->cur_window, c->cur_window_size);
  
-static bool
-lzx_params_valid(const struct wimlib_compressor_params_header *_params)
-{
-       const struct wimlib_lzx_compressor_params *params =
-               (const struct wimlib_lzx_compressor_params*)_params;
+       /* Prepare the compressed data.  */
+       lzx_prepare_blocks(c);
  
-       if (params->hdr.size != sizeof(struct wimlib_lzx_compressor_params)) {
-               LZX_DEBUG("Invalid parameter structure size!");
-               return false;
-       }
+       /* Generate the compressed data.  */
+       init_output_bitstream(&ostream, compressed_data, compressed_size_avail);
+       lzx_write_all_blocks(c, &ostream);
  
-       if (params->algorithm != WIMLIB_LZX_ALGORITHM_SLOW &&
-           params->algorithm != WIMLIB_LZX_ALGORITHM_FAST)
-       {
-               LZX_DEBUG("Invalid algorithm.");
-               return false;
+       compressed_size = flush_output_bitstream(&ostream);
+       if (compressed_size == (u32)~0UL) {
+               LZX_DEBUG("Data did not compress to %zu bytes or less!",
+                         compressed_size_avail);
+               return 0;
         }
  
-       if (params->algorithm == WIMLIB_LZX_ALGORITHM_SLOW &&
-           !params->use_defaults)
-       {
-               if (params->alg_params.slow.num_optim_passes < 1)
-               {
-                       LZX_DEBUG("Invalid number of optimization passes!");
-                       return false;
-               }
-
-               if (params->alg_params.slow.main_nostat_cost < 1 ||
-                   params->alg_params.slow.main_nostat_cost > 16)
-               {
-                       LZX_DEBUG("Invalid main_nostat_cost!");
-                       return false;
-               }
+       LZX_DEBUG("Done: compressed %zu => %zu bytes.",
+                 uncompressed_size, compressed_size);
  
-               if (params->alg_params.slow.len_nostat_cost < 1 ||
-                   params->alg_params.slow.len_nostat_cost > 16)
-               {
-                       LZX_DEBUG("Invalid len_nostat_cost!");
-                       return false;
-               }
+       return compressed_size;
+}
  
-               if (params->alg_params.slow.aligned_nostat_cost < 1 ||
-                   params->alg_params.slow.aligned_nostat_cost > 8)
-               {
-                       LZX_DEBUG("Invalid aligned_nostat_cost!");
-                       return false;
-               }
+static void
+lzx_free_compressor(void *_c)
+{
+       struct lzx_compressor *c = _c;
+
+       if (c) {
+               ALIGNED_FREE(c->cur_window);
+               FREE(c->block_specs);
+               FREE(c->chosen_items);
+               lz_mf_free(c->mf);
+               FREE(c->optimum);
+               FREE(c->cached_matches);
+               FREE(c);
         }
-       return true;
  }
  
  const struct compressor_ops lzx_compressor_ops = {
-       .params_valid       = lzx_params_valid,
         .get_needed_memory  = lzx_get_needed_memory,
         .create_compressor  = lzx_create_compressor,
         .compress           = lzx_compress,
diff --git a/src/lzx-decompress.c b/src/lzx-decompress.c

index c98b84c038ed55a09c2e43c4be971422cb6ffcb8..16b46cb253a264fc8b32b20a744e01176401acee 100644 (file)
--- a/src/lzx-decompress.c
+++ b/src/lzx-decompress.c
@@ -794,9 +794,7 @@ lzx_free_decompressor(void *_ctx)
  }
  
  static int
-lzx_create_decompressor(size_t max_window_size,
-                       const struct wimlib_decompressor_params_header *params,
-                       void **ctx_ret)
+lzx_create_decompressor(size_t max_window_size, void **ctx_ret)
  {
         struct lzx_decompressor *ctx;
  
diff --git a/src/resource.c b/src/resource.c

index 53283f2c3c5b3edf632331024910098e48b9bcf5..4b1534ec5534b2632eb36181ca685e65c05a753a 100644 (file)
--- a/src/resource.c
+++ b/src/resource.c
@@ -207,7 +207,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec,
                 rspec->wim->decompressor_ctype = WIMLIB_COMPRESSION_TYPE_NONE;
                 rspec->wim->decompressor = NULL;
         } else {
-               ret = wimlib_create_decompressor(ctype, chunk_size, NULL,
+               ret = wimlib_create_decompressor(ctype, chunk_size,
                                                  &decompressor);
                 if (ret) {
                         if (ret != WIMLIB_ERR_NOMEM)
diff --git a/src/wim.c b/src/wim.c

index fc15a21db00c6a541091e770a29ca7eef8863260..3ab0266b9216a1d8cacf480cb40fc8d367a07466 100644 (file)
--- a/src/wim.c
+++ b/src/wim.c
@@ -38,8 +38,6 @@
  #include "wimlib/security.h"
  #include "wimlib/wim.h"
  #include "wimlib/xml.h"
-#include "wimlib/compressor_ops.h"
-#include "wimlib/decompressor_ops.h"
  #include "wimlib/version.h"
  
  #ifdef __WIN32__
@@ -66,7 +64,7 @@ static u32
  wim_default_pack_chunk_size(int ctype) {
         switch (ctype) {
         case WIMLIB_COMPRESSION_TYPE_LZMS:
-               return 1U << 26; /* 67108864  */
+               return 1U << 25; /* 33554432  */
         default:
                 return 1U << 15; /* 32768     */
         }
@@ -126,9 +124,8 @@ wim_chunk_size_valid(u32 chunk_size, int ctype)
         switch (ctype) {
         case WIMLIB_COMPRESSION_TYPE_LZX:
                 return order >= 15 && order <= 21;
-
         case WIMLIB_COMPRESSION_TYPE_XPRESS:
-               return order >= 12 && order <= 26;
+               return order >= 12 && order <= 16;
         case WIMLIB_COMPRESSION_TYPE_LZMS:
                 return order >= 15 && order <= 30;
         }
@@ -1069,8 +1066,6 @@ wimlib_global_cleanup(void)
  #ifdef __WIN32__
         win32_global_cleanup();
  #endif
-       cleanup_decompressor_params();
-       cleanup_compressor_params();
  
         wimlib_set_error_file(NULL);
         lib_initialized = false;
diff --git a/src/xpress-compress.c b/src/xpress-compress.c

index 46f683f1895c0517ad953c340b918b79b6c1b1a3..6eaa2e69bf9546f58c30de1b7966990247233b0d 100644 (file)
--- a/src/xpress-compress.c
+++ b/src/xpress-compress.c
@@ -1,9 +1,8 @@
  /*
   * xpress-compress.c
   *
- * XPRESS compression routines.
- *
- * See the comments in xpress-decompress.c about the XPRESS format.
+ * A compressor that produces output compatible with the XPRESS (Huffman
+ * version) compression format.
   */
  
  /*
@@ -29,57 +28,114 @@
  #  include "config.h"
  #endif
  
-#include "wimlib.h"
-#include "wimlib/assert.h"
  #include "wimlib/compressor_ops.h"
  #include "wimlib/compress_common.h"
  #include "wimlib/error.h"
-#include "wimlib/lz_hash.h"
+#include "wimlib/lz_mf.h"
  #include "wimlib/util.h"
  #include "wimlib/xpress.h"
  
  #include <string.h>
  
-struct xpress_record_ctx {
-       u32 freqs[XPRESS_NUM_SYMBOLS];
-       struct xpress_item *chosen_items;
+#define XPRESS_CACHE_PER_POS           8
+#define XPRESS_OPTIM_ARRAY_LENGTH      4096
+
+struct xpress_compressor;
+struct xpress_item;
+struct xpress_mc_pos_data;
+
+struct xpress_compressor_params {
+       struct lz_match (*choose_item_func)(struct xpress_compressor *);
+       u32 num_optim_passes;
+       enum lz_mf_algo mf_algo;
+       u32 nice_match_length;
+       u32 max_search_depth;
  };
  
+/* XPRESS compressor state.  */
  struct xpress_compressor {
-       u8 *window;
-       u32 max_window_size;
+
+       /* Parameters determined based on the compression level.  */
+       struct xpress_compressor_params params;
+
+       unsigned (*get_matches_func)(struct xpress_compressor *,
+                                    const struct lz_match **);
+       void (*skip_bytes_func)(struct xpress_compressor *, u32 n);
+       u32 len_3_too_far;
+
+       /* Data currently being compressed  */
+       const u8 *cur_window;
+       u32 cur_window_size;
+
+       /* Lempel-Ziv match-finder  */
+       struct lz_mf *mf;
+
+       const u8 *cur_window_ptr;
+
+       /* Match cache, used when doing multiple optimization passes.  */
+       struct lz_match *cached_matches;
+       struct lz_match *cache_ptr;
+       struct lz_match *cache_limit;
+
+       /* Optimal parsing data  */
+       struct xpress_mc_pos_data *optimum;
+       unsigned optimum_cur_idx;
+       unsigned optimum_end_idx;
+       u8 costs[XPRESS_NUM_SYMBOLS];
+
+       /* Lazy parsing data  */
+       struct lz_match prev_match;
+
+       /* The selected sequence of matches/literals  */
         struct xpress_item *chosen_items;
-       u32 *prev_tab;
+
+       /* Symbol frequency counters  */
+       u32 freqs[XPRESS_NUM_SYMBOLS];
+
+       /* The current Huffman code  */
         u32 codewords[XPRESS_NUM_SYMBOLS];
         u8 lens[XPRESS_NUM_SYMBOLS];
-       struct xpress_record_ctx record_ctx;
+};
+
+/* Match-chooser position data.
+ * See corresponding declaration in lzx-compress.c for more information.  */
+struct xpress_mc_pos_data {
+       u32 cost;
+#define MC_INFINITE_COST ((u32)~0UL)
+
+       union {
+               struct {
+                       u32 link;
+                       u32 match_offset;
+               } prev;
+               struct {
+                       u32 link;
+                       u32 match_offset;
+               } next;
+       };
  };
  
  /* Intermediate XPRESS match/literal representation.  */
  struct xpress_item {
         u16 adjusted_len;  /* Match length minus XPRESS_MIN_MATCH_LEN */
         u16 offset;        /* Match offset */
-       /* For literals, offset == 0 and adjusted_len is the literal.  */
+       /* For literals, offset == 0 and adjusted_len is the literal byte.  */
  };
  
-/*
- * Writes @match, which is a match given in the intermediate representation for
- * XPRESS matches, to the output stream @ostream.
- *
- * @codewords and @lens provide the Huffman code that is being used.
- */
+/* Output an XPRESS match.  */
  static void
-xpress_write_match(struct xpress_item match,
-                  struct output_bitstream *restrict ostream,
-                  const u32 codewords[restrict],
-                  const u8 lens[restrict])
+xpress_write_match(struct xpress_item match, struct output_bitstream *ostream,
+                  const u32 codewords[], const u8 lens[])
  {
-       u8 len_hdr = min(match.adjusted_len, 0xf);
-       u8 offset_bsr = bsr32(match.offset);
+       unsigned len_hdr = min(match.adjusted_len, 0xf);
+       unsigned offset_bsr = bsr32(match.offset);
         unsigned sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr);
  
+       /* Huffman symbol  */
         bitstream_put_bits(ostream, codewords[sym], lens[sym]);
  
+       /* If length >= 18, one extra length byte.
+        * If length >= 273, three (total) extra length bytes.  */
         if (match.adjusted_len >= 0xf) {
                 u8 byte1 = min(match.adjusted_len - 0xf, 0xff);
                 bitstream_put_byte(ostream, byte1);
@@ -88,15 +144,16 @@ xpress_write_match(struct xpress_item match,
                         bitstream_put_byte(ostream, match.adjusted_len >> 8);
                 }
         }
+
+       /* Offset bits  */
         bitstream_put_bits(ostream, match.offset ^ (1U << offset_bsr), offset_bsr);
  }
  
+/* Output a sequence of XPRESS matches and literals.  */
  static void
  xpress_write_items(struct output_bitstream *ostream,
-                  const struct xpress_item items[restrict],
-                  u32 num_items,
-                  const u32 codewords[restrict],
-                  const u8 lens[restrict])
+                  const struct xpress_item items[], u32 num_items,
+                  const u32 codewords[], const u8 lens[])
  {
         for (u32 i = 0; i < num_items; i++) {
                 if (items[i].offset) {
@@ -104,204 +161,824 @@ xpress_write_items(struct output_bitstream *ostream,
                         xpress_write_match(items[i], ostream, codewords, lens);
                 } else {
                         /* Literal  */
-                       u8 lit = items[i].adjusted_len;
+                       unsigned lit = items[i].adjusted_len;
                         bitstream_put_bits(ostream, codewords[lit], lens[lit]);
                 }
         }
+       /* End-of-data symbol (required for MS compatibility)  */
         bitstream_put_bits(ostream, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]);
  }
  
+/* Make the Huffman code for XPRESS.
+ *
+ * Takes as input c->freqs and produces as output c->lens and c->codewords.  */
  static void
-xpress_record_literal(u8 lit, void *_ctx)
+xpress_make_huffman_code(struct xpress_compressor *c)
  {
-       struct xpress_record_ctx *ctx = _ctx;
-       ctx->freqs[lit]++;
-       *(ctx->chosen_items++) =
-               (struct xpress_item) { .offset = 0, .adjusted_len = lit };
+       make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN,
+                                   c->freqs, c->lens, c->codewords);
  }
  
-static void
-xpress_record_match(unsigned len, unsigned offset, void *_ctx)
+/* Account for the Huffman symbol that would be produced by outputting the
+ * specified literal.  Returns the intermediate representation of the literal.
+ */
+static inline struct xpress_item
+xpress_tally_literal(u8 lit, u32 freqs[])
  {
-       struct xpress_record_ctx *ctx = _ctx;
-
-       XPRESS_ASSERT(len >= XPRESS_MIN_MATCH_LEN);
-       XPRESS_ASSERT(len <= XPRESS_MAX_MATCH_LEN);
-       XPRESS_ASSERT(offset >= XPRESS_MIN_OFFSET);
-       XPRESS_ASSERT(offset <= XPRESS_MAX_OFFSET);
+       freqs[lit]++;
+       return (struct xpress_item) { .offset = 0, .adjusted_len = lit };
+}
  
-       unsigned adjusted_len = len - XPRESS_MIN_MATCH_LEN;
+/* Account for the Huffman symbol that would be produced by outputting the
+ * specified match.  Returns the intermediate representation of the match.  */
+static inline struct xpress_item
+xpress_tally_match(u32 len, u32 offset, u32 freqs[])
+{
+       u32 adjusted_len = len - XPRESS_MIN_MATCH_LEN;
         unsigned len_hdr = min(adjusted_len, 0xf);
         unsigned sym = XPRESS_NUM_CHARS + ((bsr32(offset) << 4) | len_hdr);
  
-       XPRESS_ASSERT(sym >= XPRESS_NUM_CHARS);
-       XPRESS_ASSERT(sym < XPRESS_NUM_SYMBOLS);
+       freqs[sym]++;
+       return (struct xpress_item) { .offset = offset,
+                                     .adjusted_len = adjusted_len };
+}
  
-       ctx->freqs[sym]++;
-       *(ctx->chosen_items++) =
-               (struct xpress_item) { .offset = offset,
-                                      .adjusted_len = adjusted_len };
+static unsigned
+xpress_get_matches_fillcache(struct xpress_compressor *c,
+                            const struct lz_match **matches_ret)
+{
+       struct lz_match *cache_ptr;
+       struct lz_match *matches;
+       unsigned num_matches;
+
+       cache_ptr = c->cache_ptr;
+       matches = cache_ptr + 1;
+       if (likely(cache_ptr <= c->cache_limit)) {
+               num_matches = lz_mf_get_matches(c->mf, matches);
+               cache_ptr->len = num_matches;
+               c->cache_ptr = matches + num_matches;
+       } else {
+               num_matches = 0;
+       }
+       c->cur_window_ptr++;
+       *matches_ret = matches;
+       return num_matches;
  }
  
-static const struct lz_params xpress_lz_params = {
-       .min_match      = XPRESS_MIN_MATCH_LEN,
-       .max_match      = XPRESS_MAX_MATCH_LEN,
-       .max_offset     = XPRESS_MAX_OFFSET,
-       .good_match     = 16,
-       .nice_match     = 32,
-       .max_chain_len  = 16,
-       .max_lazy_match = 16,
-       .too_far        = 4096,
-};
+static unsigned
+xpress_get_matches_usecache(struct xpress_compressor *c,
+                           const struct lz_match **matches_ret)
+{
+       struct lz_match *cache_ptr;
+       struct lz_match *matches;
+       unsigned num_matches;
  
-static size_t
-xpress_compress(const void *uncompressed_data, size_t uncompressed_size,
-               void *compressed_data, size_t compressed_size_avail, void *_c)
+       cache_ptr = c->cache_ptr;
+       matches = cache_ptr + 1;
+       if (likely(cache_ptr <= c->cache_limit)) {
+               num_matches = cache_ptr->len;
+               c->cache_ptr = matches + num_matches;
+       } else {
+               num_matches = 0;
+       }
+       c->cur_window_ptr++;
+       *matches_ret = matches;
+       return num_matches;
+}
+
+static unsigned
+xpress_get_matches_usecache_nocheck(struct xpress_compressor *c,
+                                   const struct lz_match **matches_ret)
  {
-       struct xpress_compressor *c = _c;
-       u8 *cptr = compressed_data;
-       struct output_bitstream ostream;
-       u32 num_chosen_items;
-       u32 i;
-       size_t compressed_size;
+       struct lz_match *cache_ptr;
+       struct lz_match *matches;
+       unsigned num_matches;
  
-       /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's
-        * impossible to compress 256 bytes or less of data to less than the
-        * input size.
-        *
-        * +1 to take into account that the buffer for compressed data is 1 byte
-        * smaller than the buffer for uncompressed data.
-        *
-        * +4 to take into account that init_output_bitstream() requires at
-        * least 4 bytes of data.  */
-       if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 1 + 4)
-               return 0;
+       cache_ptr = c->cache_ptr;
+       matches = cache_ptr + 1;
+       num_matches = cache_ptr->len;
+       c->cache_ptr = matches + num_matches;
+       c->cur_window_ptr++;
+       *matches_ret = matches;
+       return num_matches;
+}
  
-       /* Copy the data to a temporary buffer, but only to avoid
-        * inconsequential accesses of uninitialized memory in
-        * lz_analyze_block().  */
-       memcpy(c->window, uncompressed_data, uncompressed_size);
-       memset(c->window + uncompressed_size, 0, 8);
+static unsigned
+xpress_get_matches_noncaching(struct xpress_compressor *c,
+                             const struct lz_match **matches_ret)
+{
+       c->cur_window_ptr++;
+       *matches_ret = c->cached_matches;
+       return lz_mf_get_matches(c->mf, c->cached_matches);
+}
  
-       /* Determine match/literal sequence to divide the data into.  */
-       memset(c->record_ctx.freqs, 0, sizeof(c->record_ctx.freqs));
-       c->record_ctx.chosen_items = c->chosen_items;
-       lz_analyze_block(c->window,
-                        uncompressed_size,
-                        xpress_record_match,
-                        xpress_record_literal,
-                        &c->record_ctx,
-                        &xpress_lz_params,
-                        c->prev_tab);
-
-       num_chosen_items = (c->record_ctx.chosen_items - c->chosen_items);
-
-       /* Account for end of data symbol.  */
-       c->record_ctx.freqs[XPRESS_END_OF_DATA]++;
-
-       /* Build the Huffman code.  */
-       make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN,
-                                   c->record_ctx.freqs, c->lens, c->codewords);
+/*
+ * Find matches at the next position in the window.
+ *
+ * Returns the number of matches found and sets *matches_ret to point to the
+ * matches array.  The matches will be sorted by strictly increasing length and
+ * offset.
+ */
+static inline unsigned
+xpress_get_matches(struct xpress_compressor *c,
+                  const struct lz_match **matches_ret)
+{
+       return (*c->get_matches_func)(c, matches_ret);
+}
  
-       /* Output the Huffman code as a series of 512 4-bit lengths.  */
-       for (i = 0; i < XPRESS_NUM_SYMBOLS; i += 2)
-               *cptr++ = (c->lens[i] & 0xf) | (c->lens[i + 1] << 4);
+static void
+xpress_skip_bytes_fillcache(struct xpress_compressor *c, u32 n)
+{
+       struct lz_match *cache_ptr;
  
-       /* Output the encoded matches/literals.  */
-       init_output_bitstream(&ostream, cptr,
-                             compressed_size_avail - XPRESS_NUM_SYMBOLS / 2 - 1);
+       c->cur_window_ptr += n;
+       cache_ptr = c->cache_ptr;
+       lz_mf_skip_positions(c->mf, n);
+       if (likely(cache_ptr <= c->cache_limit)) {
+               do {
+                       cache_ptr->len = 0;
+                       cache_ptr += 1;
+               } while (--n && likely(cache_ptr <= c->cache_limit));
+       }
+       c->cache_ptr = cache_ptr;
+}
  
-       xpress_write_items(&ostream, c->chosen_items, num_chosen_items,
-                          c->codewords, c->lens);
+static void
+xpress_skip_bytes_usecache(struct xpress_compressor *c, u32 n)
+{
+       struct lz_match *cache_ptr;
  
-       /* Flush any pending data and get the length of the compressed data.  */
-       compressed_size = flush_output_bitstream(&ostream);
-       if (compressed_size == (u32)~0UL)
-               return 0;
+       c->cur_window_ptr += n;
+       cache_ptr = c->cache_ptr;
+       if (likely(cache_ptr <= c->cache_limit)) {
+               do {
+                       cache_ptr += 1 + cache_ptr->len;
+               } while (--n && likely(cache_ptr <= c->cache_limit));
+       }
+       c->cache_ptr = cache_ptr;
+}
+
+static void
+xpress_skip_bytes_usecache_nocheck(struct xpress_compressor *c, u32 n)
+{
+       struct lz_match *cache_ptr;
+
+       c->cur_window_ptr += n;
+       cache_ptr = c->cache_ptr;
+       do {
+               cache_ptr += 1 + cache_ptr->len;
+       } while (--n);
+       c->cache_ptr = cache_ptr;
+}
+
+static void
+xpress_skip_bytes_noncaching(struct xpress_compressor *c, u32 n)
+{
+       c->cur_window_ptr += n;
+       lz_mf_skip_positions(c->mf, n);
+}
+
+/*
+ * Skip the specified number of positions in the window (don't search for
+ * matches at them).
+ */
+static inline void
+xpress_skip_bytes(struct xpress_compressor *c, u32 n)
+{
+       return (*c->skip_bytes_func)(c, n);
+}
+
+/*
+ * Returns the cost, in bits, required to output the literal from the previous
+ * window position (the position at which matches were last searched).
+ */
+static inline u32
+xpress_prev_literal_cost(const struct xpress_compressor *c)
+{
+       return c->costs[*(c->cur_window_ptr - 1)];
+}
+
+/*
+ * Reverse the linked list of near-optimal matches so that they can be returned
+ * in forwards order.
+ *
+ * Returns the first match in the list.
+ */
+static struct lz_match
+xpress_match_chooser_reverse_list(struct xpress_compressor *c, unsigned cur_pos)
+{
+       unsigned prev_link, saved_prev_link;
+       u32 prev_match_offset, saved_prev_match_offset;
+
+       c->optimum_end_idx = cur_pos;
+
+       saved_prev_link = c->optimum[cur_pos].prev.link;
+       saved_prev_match_offset = c->optimum[cur_pos].prev.match_offset;
+
+       do {
+               prev_link = saved_prev_link;
+               prev_match_offset = saved_prev_match_offset;
+
+               saved_prev_link = c->optimum[prev_link].prev.link;
+               saved_prev_match_offset = c->optimum[prev_link].prev.match_offset;
  
-       compressed_size += XPRESS_NUM_SYMBOLS / 2;
+               c->optimum[prev_link].next.link = cur_pos;
+               c->optimum[prev_link].next.match_offset = prev_match_offset;
+
+               cur_pos = prev_link;
+       } while (cur_pos != 0);
+
+       c->optimum_cur_idx = c->optimum[0].next.link;
+
+       return (struct lz_match)
+               { .len = c->optimum_cur_idx,
+                 .offset = c->optimum[0].next.match_offset,
+               };
+}
+
+/*
+ * Near-optimal parsing.
+ *
+ * This does a forward lowest-cost path search.  The search is terminated when a
+ * sufficiently long match is found, when the search reaches a position with no
+ * alternatives, or when the temporary 'optimum' array fills up.  After
+ * termination of the search, matches/literals will be returned one by one by
+ * successive calls to this function.  Once all the matches/literals are used
+ * up, the next call to this function will begin a new search.
+ */
+static struct lz_match
+xpress_choose_near_optimal_item(struct xpress_compressor *c)
+{
+       const struct lz_match *matches;
+       unsigned num_matches;
+       struct lz_match match;
+       unsigned cur_pos;
+       unsigned end_pos;
+       struct xpress_mc_pos_data * const optimum = c->optimum;
+
+       if (c->optimum_cur_idx != c->optimum_end_idx) {
+               /* Return previously computed match or literal.  */
+               match.len = optimum[c->optimum_cur_idx].next.link -
+                                   c->optimum_cur_idx;
+               match.offset = optimum[c->optimum_cur_idx].next.match_offset;
+
+               c->optimum_cur_idx = optimum[c->optimum_cur_idx].next.link;
+               return match;
+       }
+
+       c->optimum_cur_idx = 0;
+       c->optimum_end_idx = 0;
+
+       num_matches = xpress_get_matches(c, &matches);
+
+       if (num_matches == 0)
+               return (struct lz_match) {};
+
+       if (matches[num_matches - 1].len >= c->params.nice_match_length) {
+               /* Take the long match immediately.  */
+               xpress_skip_bytes(c, matches[num_matches - 1].len - 1);
+               return matches[num_matches - 1];
+       }
+
+       /* Consider coding a literal.  */
+       optimum[1].cost = xpress_prev_literal_cost(c);
+       optimum[1].prev.link = 0;
+
+       optimum[2].cost = MC_INFINITE_COST;
  
-#if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION)
-       /* Verify that we really get the same thing back when decompressing.  */
         {
-               struct wimlib_decompressor *decompressor;
+               /* Consider coding a match.  Cost evaluation is hand-inlined so
+                * that we can do some performance hacks.  */
  
-               if (0 == wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_XPRESS,
-                                                   c->max_window_size,
-                                                   NULL,
-                                                   &decompressor))
-               {
-                       int ret;
-                       ret = wimlib_decompress(compressed_data,
-                                               compressed_size,
-                                               c->window,
-                                               uncompressed_size,
-                                               decompressor);
-                       wimlib_free_decompressor(decompressor);
-
-                       if (ret) {
-                               ERROR("Failed to decompress data we "
-                                     "compressed using XPRESS algorithm");
-                               wimlib_assert(0);
-                               return 0;
+               unsigned i = 0;
+               unsigned len = 3;
+               struct xpress_mc_pos_data *optimum_ptr = &optimum[len];
+
+               if (matches[num_matches - 1].len < 0xf + XPRESS_MIN_MATCH_LEN) {
+                       do {
+                               u32 offset = matches[i].offset;
+                               u32 offset_bsr = bsr32(offset);
+                               unsigned len_hdr = len - XPRESS_MIN_MATCH_LEN;
+                               unsigned sym = XPRESS_NUM_CHARS +
+                                               ((offset_bsr << 4) | len_hdr);
+                               do {
+                                       optimum_ptr->prev.link = 0;
+                                       optimum_ptr->prev.match_offset = offset;
+                                       optimum_ptr->cost = offset_bsr + c->costs[sym];
+                                       sym++;
+                                       optimum_ptr++;
+                               } while (++len <= matches[i].len);
+                       } while (++i != num_matches);
+               } else {
+                       do {
+                               u32 offset = matches[i].offset;
+                               u32 offset_bsr = bsr32(offset);
+                               do {
+                                       u32 adjusted_len = len - XPRESS_MIN_MATCH_LEN;
+                                       unsigned len_hdr = min(adjusted_len, 0xf);
+                                       unsigned sym = XPRESS_NUM_CHARS +
+                                                       ((offset_bsr << 4) | len_hdr);
+                                       u32 cost = offset_bsr + c->costs[sym];
+                                       if (adjusted_len >= 0xf) {
+                                               cost += 8;
+                                               if (adjusted_len - 0xf >= 0xff)
+                                                       cost += 16;
+                                       }
+
+                                       optimum_ptr->prev.link = 0;
+                                       optimum_ptr->prev.match_offset = offset;
+                                       optimum_ptr->cost = cost;
+                                       optimum_ptr++;
+                               } while (++len <= matches[i].len);
+                       } while (++i != num_matches);
+               }
+       }
+
+       end_pos = matches[num_matches - 1].len;
+       cur_pos = 1;
+       do {
+               u32 cost;
+               u32 longest_len;
+
+               num_matches = xpress_get_matches(c, &matches);
+
+               if (num_matches) {
+                       longest_len = matches[num_matches - 1].len;
+                       if (longest_len >= c->params.nice_match_length) {
+                               /* Take the long match immediately.  */
+                               match = xpress_match_chooser_reverse_list(c, cur_pos);
+
+                               optimum[cur_pos].next.match_offset =
+                                       matches[num_matches - 1].offset;
+                               optimum[cur_pos].next.link = cur_pos + longest_len;
+                               c->optimum_end_idx = cur_pos + longest_len;
+
+                               xpress_skip_bytes(c, longest_len - 1);
+
+                               return match;
                         }
-                       if (memcmp(uncompressed_data, c->window,
-                                  uncompressed_size))
-                       {
-                               ERROR("Data we compressed using XPRESS algorithm "
-                                     "didn't decompress to original");
-                               wimlib_assert(0);
-                               return 0;
+               } else {
+                       longest_len = 1;
+               }
+
+               while (end_pos < cur_pos + longest_len)
+                       optimum[++end_pos].cost = MC_INFINITE_COST;
+
+               /* Consider coding a literal.  */
+               cost = optimum[cur_pos].cost + xpress_prev_literal_cost(c);
+               if (cost < optimum[cur_pos + 1].cost) {
+                       optimum[cur_pos + 1].cost = cost;
+                       optimum[cur_pos + 1].prev.link = cur_pos;
+               }
+
+               if (num_matches) {
+                       /* Consider coding a match.  Cost evaluation is
+                        * hand-inlined so that we can do some performance
+                        * hacks.  */
+                       unsigned i = 0;
+                       unsigned len = 3;
+                       struct xpress_mc_pos_data *optimum_ptr = &optimum[cur_pos + 3];
+                       u32 cur_cost = optimum[cur_pos].cost;
+
+                       if (matches[num_matches - 1].len < 0xf + XPRESS_MIN_MATCH_LEN) {
+                               do {
+                                       u32 offset = matches[i].offset;
+                                       u32 offset_bsr = bsr32(offset);
+                                       unsigned len_hdr = len - XPRESS_MIN_MATCH_LEN;
+                                       unsigned sym = XPRESS_NUM_CHARS +
+                                                       ((offset_bsr << 4) | len_hdr);
+
+                                       u32 base_cost = cur_cost + offset_bsr;
+                                       do {
+                                               cost = base_cost + c->costs[sym];
+                                               if (cost < optimum_ptr->cost) {
+                                                       optimum_ptr->prev.link = cur_pos;
+                                                       optimum_ptr->prev.match_offset = offset;
+                                                       optimum_ptr->cost = cost;
+                                               }
+                                               sym++;
+                                               optimum_ptr++;
+                                       } while (++len <= matches[i].len);
+                               } while (++i != num_matches);
+                       } else {
+                               do {
+                                       u32 offset = matches[i].offset;
+                                       u32 offset_bsr = bsr32(offset);
+
+                                       u32 base_cost = cur_cost + offset_bsr;
+                                       do {
+                                               u32 adjusted_len = len - XPRESS_MIN_MATCH_LEN;
+                                               unsigned len_hdr = min(adjusted_len, 0xf);
+                                               unsigned sym = XPRESS_NUM_CHARS +
+                                                               ((offset_bsr << 4) | len_hdr);
+
+                                               cost = base_cost + c->costs[sym];
+                                               if (adjusted_len >= 0xf) {
+                                                       cost += 8;
+                                                       if (adjusted_len - 0xf >= 0xff)
+                                                               cost += 16;
+                                               }
+
+                                               if (cost < optimum_ptr->cost) {
+                                                       optimum_ptr->prev.link = cur_pos;
+                                                       optimum_ptr->prev.match_offset = offset;
+                                                       optimum_ptr->cost = cost;
+                                               }
+                                               optimum_ptr++;
+                                       } while (++len <= matches[i].len);
+                               } while (++i != num_matches);
+                       }
+               }
+
+               cur_pos++;
+
+       } while (cur_pos != end_pos && cur_pos != XPRESS_OPTIM_ARRAY_LENGTH);
+
+       return xpress_match_chooser_reverse_list(c, cur_pos);
+}
+
+/* Lazy parsing.  */
+static struct lz_match
+xpress_choose_lazy_item(struct xpress_compressor *c)
+{
+       const struct lz_match *matches;
+       struct lz_match cur_match;
+       struct lz_match next_match;
+       u32 num_matches;
+
+       if (c->prev_match.len) {
+               cur_match = c->prev_match;
+               c->prev_match.len = 0;
+       } else {
+               num_matches = xpress_get_matches(c, &matches);
+               if (num_matches == 0 ||
+                   (matches[num_matches - 1].len == 3 &&
+                    matches[num_matches - 1].offset >= c->len_3_too_far))
+               {
+                       cur_match.len = 0;
+                       return cur_match;
+               }
+
+               /* With lazy parsing we only consider the longest match at each
+                * position.  */
+               cur_match = matches[num_matches - 1];
+       }
+
+       if (cur_match.len >= c->params.nice_match_length) {
+               xpress_skip_bytes(c, cur_match.len - 1);
+               return cur_match;
+       }
+
+       num_matches = xpress_get_matches(c, &matches);
+       if (num_matches == 0 ||
+           (matches[num_matches - 1].len == 3 &&
+            matches[num_matches - 1].offset >= c->len_3_too_far))
+       {
+               xpress_skip_bytes(c, cur_match.len - 2);
+               return cur_match;
+       }
+
+       next_match = matches[num_matches - 1];
+
+       if (next_match.len <= cur_match.len) {
+               xpress_skip_bytes(c, cur_match.len - 2);
+               return cur_match;
+       } else {
+               /* Longer match at next position.  Choose a literal here so we
+                * will get to use the longer match.  */
+               c->prev_match = next_match;
+               cur_match.len = 0;
+               return cur_match;
+       }
+}
+
+/* Greedy parsing.  */
+static struct lz_match
+xpress_choose_greedy_item(struct xpress_compressor *c)
+{
+       const struct lz_match *matches;
+       u32 num_matches;
+
+       num_matches = xpress_get_matches(c, &matches);
+       if (num_matches == 0 ||
+           (matches[num_matches - 1].len == 3 &&
+            matches[num_matches - 1].offset >= c->len_3_too_far))
+               return (struct lz_match) {};
+
+       xpress_skip_bytes(c, matches[num_matches - 1].len - 1);
+       return matches[num_matches - 1];
+}
+
+/* Always choose a literal.  */
+static struct lz_match
+xpress_choose_literal(struct xpress_compressor *c)
+{
+       return (struct lz_match) {};
+}
+
+/*
+ * Return the next match or literal to use, delegating to the currently selected
+ * match-choosing algorithm.
+ *
+ * If the length of the returned 'struct lz_match' is less than
+ * XPRESS_MIN_MATCH_LEN, then it is really a literal.
+ */
+static inline struct lz_match
+xpress_choose_item(struct xpress_compressor *c)
+{
+       return (*c->params.choose_item_func)(c);
+}
+
+/* Set default XPRESS Huffman symbol costs to kick-start the iterative
+ * optimization algorithm.  */
+static void
+xpress_set_default_costs(u8 costs[])
+{
+       unsigned i;
+
+       for (i = 0; i < XPRESS_NUM_CHARS; i++)
+               costs[i] = 8;
+
+       for (; i < XPRESS_NUM_SYMBOLS; i++)
+               costs[i] = 10;
+}
+
+/* Copy the Huffman codeword lengths array @lens to the Huffman symbol costs
+ * array @costs, but also assign a default cost to each 0-length (unused)
+ * codeword.  */
+static void
+xpress_set_costs(u8 costs[], const u8 lens[])
+{
+       for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i++)
+               costs[i] = lens[i] ? lens[i] : XPRESS_MAX_CODEWORD_LEN;
+}
+
+/*
+ * Given the data to compress (c->cur_window, c->cur_window_size), fills in
+ * c->chosen_items with the intermediate representation of the match/literal
+ * sequence to output.  Also fills in c->codewords and c->lens to provide the
+ * Huffman code with which these items should be output.
+ *
+ * Returns the number of items written to c->chosen_items.  This can be at most
+ * c->cur_window_size.  (The worst case is all literals, no matches.)
+ */
+static u32
+xpress_choose_items(struct xpress_compressor *c)
+{
+       u32 num_passes_remaining = c->params.num_optim_passes;
+       const u8 *window_ptr;
+       const u8 *window_end;
+       struct xpress_item *next_chosen_item;
+       struct lz_match raw_item;
+       struct xpress_item xpress_item;
+
+       if (c->params.choose_item_func == xpress_choose_near_optimal_item) {
+               xpress_set_default_costs(c->costs);
+               c->optimum_cur_idx = 0;
+               c->optimum_end_idx = 0;
+       } else {
+               c->prev_match.len = 0;
+               if (c->cur_window_size <= 8192)
+                       c->len_3_too_far = 2048;
+               else
+                       c->len_3_too_far = 4096;
+       }
+
+       if (c->params.num_optim_passes > 1) {
+               c->get_matches_func = xpress_get_matches_fillcache;
+               c->skip_bytes_func = xpress_skip_bytes_fillcache;
+       } else {
+               c->get_matches_func = xpress_get_matches_noncaching;
+               c->skip_bytes_func = xpress_skip_bytes_noncaching;
+       }
+
+       lz_mf_load_window(c->mf, c->cur_window, c->cur_window_size);
+
+       while (--num_passes_remaining) {
+               window_ptr = c->cur_window_ptr = c->cur_window;
+               window_end = window_ptr + c->cur_window_size;
+               c->cache_ptr = c->cached_matches;
+               memset(c->freqs, 0, sizeof(c->freqs));
+
+               while (window_ptr != window_end) {
+                       raw_item = xpress_choose_item(c);
+                       if (raw_item.len >= XPRESS_MIN_MATCH_LEN) {
+                               xpress_tally_match(raw_item.len,
+                                                  raw_item.offset, c->freqs);
+                               window_ptr += raw_item.len;
+                       } else {
+                               xpress_tally_literal(*window_ptr, c->freqs);
+                               window_ptr += 1;
                         }
+               }
+               c->freqs[XPRESS_END_OF_DATA]++;
+               xpress_make_huffman_code(c);
+               xpress_set_costs(c->costs, c->lens);
+               if (c->cache_ptr <= c->cache_limit) {
+                       c->get_matches_func = xpress_get_matches_usecache_nocheck;
+                       c->skip_bytes_func = xpress_skip_bytes_usecache_nocheck;
                 } else {
-                       WARNING("Failed to create decompressor for "
-                               "data verification!");
+                       c->get_matches_func = xpress_get_matches_usecache;
+                       c->skip_bytes_func = xpress_skip_bytes_usecache;
                 }
         }
-#endif
  
-       return compressed_size;
+       window_ptr = c->cur_window_ptr = c->cur_window;
+       window_end = window_ptr + c->cur_window_size;
+       c->cache_ptr = c->cached_matches;
+       memset(c->freqs, 0, sizeof(c->freqs));
+       next_chosen_item = c->chosen_items;
+
+       u32 unseen_cost = 9;
+       while (window_ptr != window_end) {
+               raw_item = xpress_choose_item(c);
+               if (raw_item.len >= XPRESS_MIN_MATCH_LEN) {
+                       xpress_item = xpress_tally_match(raw_item.len,
+                                                        raw_item.offset,
+                                                        c->freqs);
+                       window_ptr += raw_item.len;
+               } else {
+                       xpress_item = xpress_tally_literal(*window_ptr,
+                                                          c->freqs);
+                       window_ptr += 1;
+               }
+               *next_chosen_item++ = xpress_item;
+
+               /* When doing one-pass near-optimal parsing, rebuild the Huffman
+                * code occasionally.  */
+               if (unlikely((next_chosen_item - c->chosen_items) % 2048 == 0) &&
+                   c->params.choose_item_func == xpress_choose_near_optimal_item &&
+                   c->cur_window_size >= 16384 &&
+                   c->params.num_optim_passes == 1)
+               {
+                       xpress_make_huffman_code(c);
+                       for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i++)
+                               c->costs[i] = c->lens[i] ? c->lens[i] : unseen_cost;
+                       if (unseen_cost < 15)
+                               unseen_cost++;
+               }
+       }
+       c->freqs[XPRESS_END_OF_DATA]++;
+       xpress_make_huffman_code(c);
+       return next_chosen_item - c->chosen_items;
  }
  
+/* Given the specified compression level and maximum window size, build the
+ * parameters to use for XPRESS compression.  */
  static void
-xpress_free_compressor(void *_c)
+xpress_build_params(unsigned int compression_level, u32 max_window_size,
+                   struct xpress_compressor_params *xpress_params)
  {
-       struct xpress_compressor *c = _c;
+       memset(xpress_params, 0, sizeof(*xpress_params));
  
-       if (c) {
-               FREE(c->window);
-               FREE(c->chosen_items);
-               FREE(c->prev_tab);
-               FREE(c);
+       if (compression_level == 1) {
+
+               /* Huffman only (no Lempel-Ziv matches)  */
+               xpress_params->mf_algo = LZ_MF_NULL;
+               xpress_params->choose_item_func = xpress_choose_literal;
+               xpress_params->num_optim_passes = 1;
+
+       } else if (compression_level < 30) {
+
+               /* Greedy parsing  */
+               xpress_params->mf_algo = LZ_MF_HASH_CHAINS;
+               xpress_params->choose_item_func = xpress_choose_greedy_item;
+               xpress_params->num_optim_passes = 1;
+               xpress_params->nice_match_length = compression_level;
+               xpress_params->max_search_depth = compression_level / 2;
+
+       } else if (compression_level < 60) {
+
+               /* Lazy parsing  */
+               xpress_params->mf_algo = LZ_MF_HASH_CHAINS;
+               xpress_params->choose_item_func = xpress_choose_lazy_item;
+               xpress_params->num_optim_passes = 1;
+               xpress_params->nice_match_length = compression_level;
+               xpress_params->max_search_depth = compression_level / 2;
+
+       } else {
+
+               /* Near-optimal parsing  */
+               xpress_params->choose_item_func = xpress_choose_near_optimal_item;
+               if (max_window_size >= 32768)
+                       xpress_params->mf_algo = LZ_MF_BINARY_TREES;
+               else
+                       xpress_params->mf_algo = LZ_MF_HASH_CHAINS;
+               xpress_params->num_optim_passes = compression_level / 40;
+               xpress_params->nice_match_length = min(compression_level / 2,
+                                                      XPRESS_MAX_MATCH_LEN);
+               xpress_params->max_search_depth = min(compression_level,
+                                                     XPRESS_MAX_MATCH_LEN);
+       }
+}
+
+/* Given the specified XPRESS parameters and maximum window size, build the
+ * parameters to use for match-finding.  */
+static void
+xpress_build_mf_params(const struct xpress_compressor_params *xpress_params,
+                      u32 max_window_size, struct lz_mf_params *mf_params)
+{
+       memset(mf_params, 0, sizeof(*mf_params));
+
+       mf_params->algorithm = xpress_params->mf_algo;
+       mf_params->max_window_size = max_window_size;
+       mf_params->min_match_len = XPRESS_MIN_MATCH_LEN;
+       mf_params->max_match_len = XPRESS_MAX_MATCH_LEN;
+       mf_params->max_search_depth = xpress_params->max_search_depth;
+       mf_params->nice_match_len = xpress_params->nice_match_length;
+}
+
+static inline bool
+xpress_window_size_valid(size_t window_size)
+{
+       return (window_size > 0 && window_size <= XPRESS_MAX_OFFSET + 1);
+}
+
+static void
+xpress_free_compressor(void *_c);
+
+static u64
+xpress_get_needed_memory(size_t max_window_size, unsigned int compression_level)
+{
+       u64 size = 0;
+       struct xpress_compressor_params params;
+
+       if (!xpress_window_size_valid(max_window_size))
+               return 0;
+
+       xpress_build_params(compression_level, max_window_size, &params);
+
+       size += sizeof(struct xpress_compressor);
+
+       size += lz_mf_get_needed_memory(params.mf_algo, max_window_size);
+
+       if (params.num_optim_passes > 1) {
+               size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS,
+                                      params.max_search_depth + 1);
+               size += cache_len * sizeof(struct lz_match);
+       } else {
+               size += params.max_search_depth * sizeof(struct lz_match);
+       }
+
+       if (params.choose_item_func == xpress_choose_near_optimal_item) {
+               size += (XPRESS_OPTIM_ARRAY_LENGTH + params.nice_match_length) *
+                                     sizeof(struct xpress_mc_pos_data);
         }
+
+       size += max_window_size * sizeof(struct xpress_item);
+
+       return size;
  }
  
  static int
-xpress_create_compressor(size_t max_window_size,
-                        const struct wimlib_compressor_params_header *params,
+xpress_create_compressor(size_t max_window_size, unsigned int compression_level,
                          void **c_ret)
  {
         struct xpress_compressor *c;
+       struct xpress_compressor_params params;
+       struct lz_mf_params mf_params;
  
-       if (max_window_size == 0 || max_window_size > (1U << 26))
+       if (!xpress_window_size_valid(max_window_size))
                 return WIMLIB_ERR_INVALID_PARAM;
  
+       xpress_build_params(compression_level, max_window_size, &params);
+       xpress_build_mf_params(&params, max_window_size, &mf_params);
+
         c = CALLOC(1, sizeof(struct xpress_compressor));
-       if (c == NULL)
+       if (!c)
                 goto oom;
  
-       c->window = MALLOC(max_window_size + 8);
-       if (c->window == NULL)
+       c->params = params;
+
+       c->mf = lz_mf_alloc(&mf_params);
+       if (!c->mf)
                 goto oom;
  
-       c->max_window_size = max_window_size;
+       if (params.num_optim_passes > 1) {
+               size_t cache_len = max(max_window_size * XPRESS_CACHE_PER_POS,
+                                      params.max_search_depth + 1);
+               c->cached_matches = MALLOC(cache_len * sizeof(struct lz_match));
+               if (!c->cached_matches)
+                       goto oom;
+               c->cache_limit = c->cached_matches + cache_len -
+                                  (params.max_search_depth + 1);
+       } else {
+               c->cached_matches = MALLOC(params.max_search_depth *
+                                          sizeof(struct lz_match));
+               if (!c->cached_matches)
+                       goto oom;
+       }
  
-       c->chosen_items = MALLOC(max_window_size * sizeof(c->chosen_items[0]));
-       if (c->chosen_items == NULL)
-               goto oom;
+       if (params.choose_item_func == xpress_choose_near_optimal_item) {
+               c->optimum = MALLOC((XPRESS_OPTIM_ARRAY_LENGTH +
+                                    params.nice_match_length) *
+                                     sizeof(struct xpress_mc_pos_data));
+               if (!c->optimum)
+                       goto oom;
+       }
  
-       c->prev_tab = MALLOC(max_window_size * sizeof(c->prev_tab[0]));
-       if (c->prev_tab == NULL)
+       c->chosen_items = MALLOC(max_window_size * sizeof(struct xpress_item));
+       if (!c->chosen_items)
                 goto oom;
  
         *c_ret = c;
@@ -312,18 +989,65 @@ oom:
         return WIMLIB_ERR_NOMEM;
  }
  
-static u64
-xpress_get_needed_memory(size_t max_window_size,
-                        const struct wimlib_compressor_params_header *params)
+static size_t
+xpress_compress(const void *uncompressed_data, size_t uncompressed_size,
+               void *compressed_data, size_t compressed_size_avail, void *_c)
  {
-       u64 size = 0;
+       struct xpress_compressor *c = _c;
+       u32 num_chosen_items;
+       u8 *cptr;
+       struct output_bitstream ostream;
+       u32 compressed_size;
  
-       size += sizeof(struct xpress_compressor);
-       size += max_window_size + 8;
-       size += max_window_size * sizeof(((struct xpress_compressor*)0)->chosen_items[0]);
-       size += max_window_size * sizeof(((struct xpress_compressor*)0)->prev_tab[0]);
+       /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's
+        * impossible to compress 256 bytes or less of data to less than the
+        * input size.
+        *
+        * +1 to take into account that the buffer for compressed data is 1 byte
+        * smaller than the buffer for uncompressed data.
+        *
+        * +4 to take into account that init_output_bitstream() requires at
+        * least 4 bytes of data.  */
+       if (compressed_size_avail < XPRESS_NUM_SYMBOLS / 2 + 1 + 4)
+               return 0;
  
-       return size;
+       /* Determine match/literal sequence to divide the data into.  */
+       c->cur_window = uncompressed_data;
+       c->cur_window_size = uncompressed_size;
+       num_chosen_items = xpress_choose_items(c);
+
+       /* Output the Huffman code as a series of 512 4-bit lengths.  */
+       cptr = compressed_data;
+       for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS; i += 2)
+               *cptr++ = (c->lens[i] & 0xf) | (c->lens[i + 1] << 4);
+
+       /* Output the encoded matches/literals.  */
+       init_output_bitstream(&ostream, cptr,
+                             compressed_size_avail - XPRESS_NUM_SYMBOLS / 2 - 1);
+       xpress_write_items(&ostream, c->chosen_items, num_chosen_items,
+                          c->codewords, c->lens);
+
+       /* Flush any pending data and get the length of the compressed data.  */
+       compressed_size = flush_output_bitstream(&ostream);
+       if (compressed_size == (u32)~0UL)
+               return 0;
+
+       /* Return the length of the compressed data.  */
+       return compressed_size + XPRESS_NUM_SYMBOLS / 2;
+}
+
+static void
+xpress_free_compressor(void *_c)
+{
+       struct xpress_compressor *c = _c;
+
+       if (c) {
+               lz_mf_free(c->mf);
+               FREE(c->cached_matches);
+               FREE(c->optimum);
+               FREE(c->chosen_items);
+               FREE(c);
+       }
  }
  
  const struct compressor_ops xpress_compressor_ops = {
author	Eric Biggers <ebiggers3@gmail.com>
	Sat, 19 Jul 2014 22:11:59 +0000 (17:11 -0500)
committer	Eric Biggers <ebiggers3@gmail.com>
	Sat, 19 Jul 2014 22:14:31 +0000 (17:14 -0500)
Makefile.am		patch \| blob \| history
NEWS		patch \| blob \| history
README		patch \| blob \| history
doc/man1/imagex-capture.1.in		patch \| blob \| history
doc/man1/imagex-export.1.in		patch \| blob \| history
doc/man1/imagex-extract.1.in		patch \| blob \| history
doc/man1/imagex-mount.1.in		patch \| blob \| history
doc/man1/imagex-optimize.1.in		patch \| blob \| history
examples/Makefile		patch \| blob \| history
examples/compressfile.c		patch \| blob \| history
examples/decompressfile.c		patch \| blob \| history
examples/updatewim.c	[new file with mode: 0644]	patch \| blob
include/wimlib.h		patch \| blob \| history
include/wimlib/compressor_ops.h		patch \| blob \| history
include/wimlib/decompressor_ops.h		patch \| blob \| history
include/wimlib/divsufsort.h	[new file with mode: 0644]	patch \| blob
include/wimlib/lz.h	[deleted file]	patch \| blob \| history
include/wimlib/lz_bt.h	[deleted file]	patch \| blob \| history
include/wimlib/lz_hash.h	[deleted file]	patch \| blob \| history
include/wimlib/lz_mf.h	[new file with mode: 0644]	patch \| blob
include/wimlib/lz_mf_ops.h	[new file with mode: 0644]	patch \| blob
include/wimlib/lz_suffix_array_utils.h	[new file with mode: 0644]	patch \| blob
include/wimlib/lzx.h		patch \| blob \| history
programs/imagex.c		patch \| blob \| history
src/compress.c		patch \| blob \| history
src/compress_parallel.c		patch \| blob \| history
src/compress_serial.c		patch \| blob \| history
src/decompress.c		patch \| blob \| history
src/divsufsort.c	[new file with mode: 0644]	patch \| blob
src/lz_binary_trees.c	[moved from src/lz_bt.c with 57% similarity]	patch \| blob \| history
src/lz_brute_force.c	[new file with mode: 0644]	patch \| blob
src/lz_hash.c	[deleted file]	patch \| blob \| history
src/lz_hash_chains.c	[new file with mode: 0644]	patch \| blob
src/lz_lcp_interval_tree.c	[new file with mode: 0644]	patch \| blob
src/lz_linked_suffix_array.c	[new file with mode: 0644]	patch \| blob
src/lz_mf.c	[new file with mode: 0644]	patch \| blob
src/lz_null.c	[new file with mode: 0644]	patch \| blob
src/lz_suffix_array_utils.c	[new file with mode: 0644]	patch \| blob
src/lzms-compress.c		patch \| blob \| history
src/lzms-decompress.c		patch \| blob \| history
src/lzx-compress.c		patch \| blob \| history
src/lzx-decompress.c		patch \| blob \| history
src/resource.c		patch \| blob \| history
src/wim.c		patch \| blob \| history
src/xpress-compress.c		patch \| blob \| history