lz_optimal.h: Minor comment fixes

[wimlib] / include / wimlib.h
diff --git a/include/wimlib.h b/include/wimlib.h

index fcc88b028cf4c44b3a6a94d4a4effead6fae0e22..129a6c5a749a0e349b881fd0a5bd0ebe27b78870 100644 (file)
--- a/include/wimlib.h
+++ b/include/wimlib.h
@@ -491,9 +491,8 @@ enum wimlib_progress_msg {
          * ::wimlib_progress_info.scan. */
         WIMLIB_PROGRESS_MSG_SCAN_END,
  
-       /**
-        * File resources are currently being written to the WIM.
-        * @p info will point to ::wimlib_progress_info.write_streams. */
+       /** File resources ("streams") are currently being written to the WIM.
+        * @p info will point to ::wimlib_progress_info.write_streams.  */
         WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
  
         /**
@@ -561,43 +560,83 @@ union wimlib_progress_info {
         /* N.B. I wanted these to be anonymous structs, but Doxygen won't
          * document them if they aren't given a name... */
  
-       /** Valid on messages ::WIMLIB_PROGRESS_MSG_WRITE_STREAMS. */
+       /** Valid on the message ::WIMLIB_PROGRESS_MSG_WRITE_STREAMS.  This is
+        * the primary message for tracking the progress of writing a WIM file.
+        */
         struct wimlib_progress_info_write_streams {
-               /** Number of bytes that are going to be written for all the
-                * streams combined.  This is the amount in uncompressed data.
-                * (The actual number of bytes will be less if the data is being
-                * written compressed.) */
+               /** Total number of uncompressed bytes of stream data being
+                * written.  This can be thought of as the total uncompressed
+                * size of the files being archived, with some caveats.  WIM
+                * files use single-instance streams, so the size provided here
+                * only counts distinct streams, except for the following
+                * exception: the size provided here may include the sizes of
+                * all newly added (e.g. with wimlib_add_image() streams,
+                * pending automatic de-duplication during the write operation
+                * itself.  When each such stream de-duplication occurs, this
+                * number will be decreased by the size of the duplicate stream
+                * that need not be written.
+                *
+                * In the case of a wimlib_overwrite() that the library opted to
+                * perform in-place, both @p total_streams and @p total_bytes
+                * will only count the streams actually being written and not
+                * pre-existing streams in the WIM file.  */
                 uint64_t total_bytes;
  
-               /** Number of streams that are going to be written. */
+               /** Total number of streams being written.  This can be thought
+                * of as the total number of files being archived, with some
+                * caveats.  In general, a single file or directory may contain
+                * multiple data streams, each of which will be represented
+                * separately in this number.  Furthermore, WIM files use
+                * single-instance streams, so the stream count provided here
+                * only counts distinct streams, except for the following
+                * exception: the stream count provided here may include newly
+                * added (e.g. with wimlib_add_image() streams, pending
+                * automatic de-duplication during the write operation itself.
+                * When each such stream de-duplication occurs, this number will
+                * be decreased by 1 to account for the duplicate stream that
+                * need not be written.  */
                 uint64_t total_streams;
  
-               /** Number of uncompressed bytes that have been written so far.
-                * Will be 0 initially, and equal to @p total_bytes at the end.
-                * */
+               /** Number of uncompressed bytes of stream data that have been
+                * written so far.  This number be 0 initially, and will be
+                * equal to @p total_bytes at the end of the write operation.
+                * Note that @p total_bytes (but not @p completed_bytes) may
+                * decrease throughout the write operation due to the discovery
+                * of stream duplications.  */
                 uint64_t completed_bytes;
  
-               /** Number of streams that have been written.  Will be 0
-                * initially, and equal to @p total_streams at the end. */
+               /** Number of streams that have been written so far.  This
+                * number will be 0 initially, and will be equal to @p
+                * total_streams at the end of the write operation.  Note that
+                * @p total_streams (but not @p completed_streams) may decrease
+                * throughout the write operation due to the discovery of stream
+                * duplications.
+                *
+                * For applications that wish to calculate a simple "percent
+                * complete" for the write operation, it will likely be more
+                * accurate to calculate the percentage from @p completed_bytes
+                * and @p total_bytes rather than @p completed_streams and
+                * @p total_streams because the time for the operation to
+                * complete is mainly determined by the number of bytes that
+                * need to be read, compressed, and written, not just the number
+                * of files being archived.  */
                 uint64_t completed_streams;
  
-               /** Number of threads that are being used to compress resources
-                * (if applicable).  */
-               unsigned num_threads;
+               /** Number of threads that are being used to compress streams,
+                * or 1 if streams are being written uncompressed.  */
+               uint32_t num_threads;
  
-               /** The compression type being used to write the streams; either
-                * ::WIMLIB_COMPRESSION_TYPE_NONE,
-                * ::WIMLIB_COMPRESSION_TYPE_XPRESS, or
-                * ::WIMLIB_COMPRESSION_TYPE_LZX. */
-               int      compression_type;
+               /** The compression type being used to write the streams, as one
+                * of the ::wimlib_compression_type constants.  */
+               int32_t  compression_type;
  
                 /** Number of split WIM parts from which streams are being
                  * written (may be 0 if irrelevant).  */
-               unsigned total_parts;
+               uint32_t total_parts;
  
                 /** Number of split WIM parts from which streams have been
                  * written (may be 0 if irrelevant).  */
-               unsigned completed_parts;
+               uint32_t completed_parts;
         } write_streams;
  
         /** Valid on messages ::WIMLIB_PROGRESS_MSG_SCAN_BEGIN,
@@ -2432,7 +2471,7 @@ wimlib_extract_image_from_pipe(int pipe_fd,
  
  /**
   * Similar to wimlib_extract_paths(), but the paths to extract from the WIM
- * image specified in the UTF-8 text file named by @p path_list_file which
+ * image are specified in the UTF-8 text file named by @p path_list_file which
   * itself contains the list of paths to use, one per line.  Leading and trailing
   * whitespace, and otherwise empty lines and lines beginning with the ';'
   * character are ignored.  No quotes are needed as paths are otherwise delimited
@@ -3357,17 +3396,18 @@ wimlib_set_image_descripton(WIMStruct *wim, int image,
   *     ::WIMStruct for a WIM.
   * @param out_chunk_size
   *     The chunk size (in bytes) to set.  The valid chunk sizes are dependent
- *     on the compression format.  The XPRESS compression format supports chunk
- *     sizes that are powers of 2 with exponents between 15 and 26 inclusively,
- *     whereas the LZX compression format supports chunk sizes that are powers
- *     of 2 with exponents between 15 and 21 inclusively.  As a special case,
- *     if @p out_chunk_size is specified as 0, the chunk size is set to the
- *     default for the currently selected output compression type.
+ *     on the compression format.  The XPRESS and LZMS compression formats
+ *     support chunk sizes that are powers of 2 with exponents between 15 and
+ *     26 inclusively, whereas the LZX compression format supports chunk sizes
+ *     that are powers of 2 with exponents between 15 and 21 inclusively.  As a
+ *     special case, if @p out_chunk_size is specified as 0, the chunk size is
+ *     set to the default for the currently selected output compression type.
   *
   * @return 0 on success; nonzero on error.
   *
   * @retval ::WIMLIB_ERR_INVALID_CHUNK_SIZE
- *     @p ctype is not a supported chunk size.
+ *     @p chunk_size is not a supported chunk size for the currently selected
+ *     output compression type.
   */
  extern int
  wimlib_set_output_chunk_size(WIMStruct *wim, uint32_t chunk_size);
@@ -3974,10 +4014,11 @@ struct wimlib_lzx_compressor_params {
  
                         uint32_t slow_reserved1 : 31;
  
-                       /** Matches with length (in bytes) longer than this
-                        * value are immediately taken without spending time on
-                        * minimum-cost measurements.  Suggested value: 32.  */
-                       uint32_t num_fast_bytes;
+                       /** Matches with length (in bytes) greater than or equal
+                        * to this value are immediately taken without spending
+                        * time on minimum-cost measurements.  Suggested value:
+                        * 32.  */
+                       uint32_t nice_match_length;
  
                         /** Number of passes to compute a match/literal sequence
                          * for each LZX block.  This is for an iterative
@@ -4020,6 +4061,42 @@ struct wimlib_lzx_compressor_params {
         } alg_params;
  };
  
+/** LZMS compression parameters that can optionally be passed to
+ * wimlib_create_compressor() with the compression type
+ * ::WIMLIB_COMPRESSION_TYPE_LZMS.  */
+struct wimlib_lzms_compressor_params {
+       /** hdr.size Must be set to the size of this structure, in bytes.  */
+       struct wimlib_compressor_params_header hdr;
+
+       /** Minimum match length to output.  This must be at least 2.  Suggested
+        * value: 2  */
+       uint32_t min_match_length;
+
+       /** Maximum match length to output.  This must be at least @p
+        * min_match_length.  Suggested value: @p UINT32_MAX.  */
+       uint32_t max_match_length;
+
+       /** Matches with length (in bytes) greater than or equal to this value
+        * are immediately taken without spending time on minimum-cost
+        * measurements.  The minimum of @p max_match_length and @p
+        * nice_match_length may not exceed 65536.  Suggested value: 32.  */
+       uint32_t nice_match_length;
+
+       /** Maximum depth to search for matches at each position.  Suggested
+        * value: 50.  */
+       uint32_t max_search_depth;
+
+       /** Maximum number of potentially good matches to consider at each
+        * position.  Suggested value: 3.  */
+       uint32_t max_matches_per_pos;
+
+       /** Length of the array for the near-optimal LZ parsing algorithm.  This
+        * must be at least 1.  Suggested value: 1024.  */
+       uint32_t optim_array_length;
+
+       uint64_t reserved2[4];
+};
+
  /** Opaque compressor handle.  */
  struct wimlib_compressor;
  
@@ -4041,6 +4118,8 @@ struct wimlib_decompressor;
   *
   * @retval ::WIMLIB_ERR_INVALID_COMPRESSION_TYPE
   *     @p ctype was not a supported compression type.
+ * @retval ::WIMLIB_ERR_INVALID_PARAM
+ *     @p params were invalid.
   * @retval ::WIMLIB_ERR_NOMEM
   *     Not enough memory to duplicate the parameters (perhaps @c params->size
   *     was invalid).
@@ -4049,6 +4128,18 @@ extern int
  wimlib_set_default_compressor_params(enum wimlib_compression_type ctype,
                                      const struct wimlib_compressor_params_header *params);
  
+/**
+ * Returns the approximate number of bytes needed to allocate a compressor with
+ * wimlib_create_compressor() for the specified compression type, block size,
+ * and parameters.  @p params may be @c NULL, in which case the current default
+ * parameters for @p ctype are used.  Returns 0 if the compression type or
+ * parameters are invalid.
+ */
+extern uint64_t
+wimlib_get_compressor_needed_memory(enum wimlib_compression_type ctype,
+                                   size_t max_block_size,
+                                   const struct wimlib_compressor_params_header *params);
+
  /**
   * Allocate a compressor for the specified compression type using the specified
   * parameters.