* ::wimlib_progress_info.scan. */
WIMLIB_PROGRESS_MSG_SCAN_END,
- /**
- * File resources are currently being written to the WIM.
- * @p info will point to ::wimlib_progress_info.write_streams. */
+ /** File resources ("streams") are currently being written to the WIM.
+ * @p info will point to ::wimlib_progress_info.write_streams. */
WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
/**
/* N.B. I wanted these to be anonymous structs, but Doxygen won't
* document them if they aren't given a name... */
- /** Valid on messages ::WIMLIB_PROGRESS_MSG_WRITE_STREAMS. */
+ /** Valid on the message ::WIMLIB_PROGRESS_MSG_WRITE_STREAMS. This is
+ * the primary message for tracking the progress of writing a WIM file.
+ */
struct wimlib_progress_info_write_streams {
- /** Number of bytes that are going to be written for all the
- * streams combined. This is the amount in uncompressed data.
- * (The actual number of bytes will be less if the data is being
- * written compressed.) */
+ /** Total number of uncompressed bytes of stream data being
+ * written. This can be thought of as the total uncompressed
+ * size of the files being archived, with some caveats. WIM
+ * files use single-instance streams, so the size provided here
+ * only counts distinct streams, except for the following
+ * exception: the size provided here may include the sizes of
+ * all newly added (e.g. with wimlib_add_image() streams,
+ * pending automatic de-duplication during the write operation
+ * itself. When each such stream de-duplication occurs, this
+ * number will be decreased by the size of the duplicate stream
+ * that need not be written.
+ *
+ * In the case of a wimlib_overwrite() that the library opted to
+ * perform in-place, both @p total_streams and @p total_bytes
+ * will only count the streams actually being written and not
+ * pre-existing streams in the WIM file. */
uint64_t total_bytes;
- /** Number of streams that are going to be written. */
+ /** Total number of streams being written. This can be thought
+ * of as the total number of files being archived, with some
+ * caveats. In general, a single file or directory may contain
+ * multiple data streams, each of which will be represented
+ * separately in this number. Furthermore, WIM files use
+ * single-instance streams, so the stream count provided here
+ * only counts distinct streams, except for the following
+ * exception: the stream count provided here may include newly
+ * added (e.g. with wimlib_add_image() streams, pending
+ * automatic de-duplication during the write operation itself.
+ * When each such stream de-duplication occurs, this number will
+ * be decreased by 1 to account for the duplicate stream that
+ * need not be written. */
uint64_t total_streams;
- /** Number of uncompressed bytes that have been written so far.
- * Will be 0 initially, and equal to @p total_bytes at the end.
- * */
+ /** Number of uncompressed bytes of stream data that have been
+ * written so far. This number be 0 initially, and will be
+ * equal to @p total_bytes at the end of the write operation.
+ * Note that @p total_bytes (but not @p completed_bytes) may
+ * decrease throughout the write operation due to the discovery
+ * of stream duplications. */
uint64_t completed_bytes;
- /** Number of streams that have been written. Will be 0
- * initially, and equal to @p total_streams at the end. */
+ /** Number of streams that have been written so far. This
+ * number will be 0 initially, and will be equal to @p
+ * total_streams at the end of the write operation. Note that
+ * @p total_streams (but not @p completed_streams) may decrease
+ * throughout the write operation due to the discovery of stream
+ * duplications.
+ *
+ * For applications that wish to calculate a simple "percent
+ * complete" for the write operation, it will likely be more
+ * accurate to calculate the percentage from @p completed_bytes
+ * and @p total_bytes rather than @p completed_streams and
+ * @p total_streams because the time for the operation to
+ * complete is mainly determined by the number of bytes that
+ * need to be read, compressed, and written, not just the number
+ * of files being archived. */
uint64_t completed_streams;
- /** Number of threads that are being used to compress resources
- * (if applicable). */
- unsigned num_threads;
+ /** Number of threads that are being used to compress streams,
+ * or 1 if streams are being written uncompressed. */
+ uint32_t num_threads;
- /** The compression type being used to write the streams; either
- * ::WIMLIB_COMPRESSION_TYPE_NONE,
- * ::WIMLIB_COMPRESSION_TYPE_XPRESS, or
- * ::WIMLIB_COMPRESSION_TYPE_LZX. */
- int compression_type;
+ /** The compression type being used to write the streams, as one
+ * of the ::wimlib_compression_type constants. */
+ int32_t compression_type;
/** Number of split WIM parts from which streams are being
* written (may be 0 if irrelevant). */
- unsigned total_parts;
+ uint32_t total_parts;
/** Number of split WIM parts from which streams have been
* written (may be 0 if irrelevant). */
- unsigned completed_parts;
+ uint32_t completed_parts;
} write_streams;
/** Valid on messages ::WIMLIB_PROGRESS_MSG_SCAN_BEGIN,
/**
* Similar to wimlib_extract_paths(), but the paths to extract from the WIM
- * image specified in the UTF-8 text file named by @p path_list_file which
+ * image are specified in the UTF-8 text file named by @p path_list_file which
* itself contains the list of paths to use, one per line. Leading and trailing
* whitespace, and otherwise empty lines and lines beginning with the ';'
* character are ignored. No quotes are needed as paths are otherwise delimited
* ::WIMStruct for a WIM.
* @param out_chunk_size
* The chunk size (in bytes) to set. The valid chunk sizes are dependent
- * on the compression format. The XPRESS compression format supports chunk
- * sizes that are powers of 2 with exponents between 15 and 26 inclusively,
- * whereas the LZX compression format supports chunk sizes that are powers
- * of 2 with exponents between 15 and 21 inclusively. As a special case,
- * if @p out_chunk_size is specified as 0, the chunk size is set to the
- * default for the currently selected output compression type.
+ * on the compression format. The XPRESS and LZMS compression formats
+ * support chunk sizes that are powers of 2 with exponents between 15 and
+ * 26 inclusively, whereas the LZX compression format supports chunk sizes
+ * that are powers of 2 with exponents between 15 and 21 inclusively. As a
+ * special case, if @p out_chunk_size is specified as 0, the chunk size is
+ * set to the default for the currently selected output compression type.
*
* @return 0 on success; nonzero on error.
*
* @retval ::WIMLIB_ERR_INVALID_CHUNK_SIZE
- * @p ctype is not a supported chunk size.
+ * @p chunk_size is not a supported chunk size for the currently selected
+ * output compression type.
*/
extern int
wimlib_set_output_chunk_size(WIMStruct *wim, uint32_t chunk_size);
uint32_t slow_reserved1 : 31;
- /** Matches with length (in bytes) longer than this
- * value are immediately taken without spending time on
- * minimum-cost measurements. Suggested value: 32. */
- uint32_t num_fast_bytes;
+ /** Matches with length (in bytes) greater than or equal
+ * to this value are immediately taken without spending
+ * time on minimum-cost measurements. Suggested value:
+ * 32. */
+ uint32_t nice_match_length;
/** Number of passes to compute a match/literal sequence
* for each LZX block. This is for an iterative
} alg_params;
};
+/** LZMS compression parameters that can optionally be passed to
+ * wimlib_create_compressor() with the compression type
+ * ::WIMLIB_COMPRESSION_TYPE_LZMS. */
+struct wimlib_lzms_compressor_params {
+ /** hdr.size Must be set to the size of this structure, in bytes. */
+ struct wimlib_compressor_params_header hdr;
+
+ /** Minimum match length to output. This must be at least 2. Suggested
+ * value: 2 */
+ uint32_t min_match_length;
+
+ /** Maximum match length to output. This must be at least @p
+ * min_match_length. Suggested value: @p UINT32_MAX. */
+ uint32_t max_match_length;
+
+ /** Matches with length (in bytes) greater than or equal to this value
+ * are immediately taken without spending time on minimum-cost
+ * measurements. The minimum of @p max_match_length and @p
+ * nice_match_length may not exceed 65536. Suggested value: 32. */
+ uint32_t nice_match_length;
+
+ /** Maximum depth to search for matches at each position. Suggested
+ * value: 50. */
+ uint32_t max_search_depth;
+
+ /** Maximum number of potentially good matches to consider at each
+ * position. Suggested value: 3. */
+ uint32_t max_matches_per_pos;
+
+ /** Length of the array for the near-optimal LZ parsing algorithm. This
+ * must be at least 1. Suggested value: 1024. */
+ uint32_t optim_array_length;
+
+ uint64_t reserved2[4];
+};
+
/** Opaque compressor handle. */
struct wimlib_compressor;
*
* @retval ::WIMLIB_ERR_INVALID_COMPRESSION_TYPE
* @p ctype was not a supported compression type.
+ * @retval ::WIMLIB_ERR_INVALID_PARAM
+ * @p params were invalid.
* @retval ::WIMLIB_ERR_NOMEM
* Not enough memory to duplicate the parameters (perhaps @c params->size
* was invalid).
wimlib_set_default_compressor_params(enum wimlib_compression_type ctype,
const struct wimlib_compressor_params_header *params);
+/**
+ * Returns the approximate number of bytes needed to allocate a compressor with
+ * wimlib_create_compressor() for the specified compression type, block size,
+ * and parameters. @p params may be @c NULL, in which case the current default
+ * parameters for @p ctype are used. Returns 0 if the compression type or
+ * parameters are invalid.
+ */
+extern uint64_t
+wimlib_get_compressor_needed_memory(enum wimlib_compression_type ctype,
+ size_t max_block_size,
+ const struct wimlib_compressor_params_header *params);
+
/**
* Allocate a compressor for the specified compression type using the specified
* parameters.