/**
* @ingroup G_writing_and_overwriting_wims
*
- * Set the compression type of a WIM to use in subsequent calls to
+ * Set the compression chunk size of a WIM to use in subsequent calls to
* wimlib_write() or wimlib_overwrite().
*
+ * @param wim
+ * ::WIMStruct for a WIM.
+ * @param out_chunk_size
+ * The chunk size (in bytes) to set. The valid chunk sizes are dependent
+ * on the compression format. The XPRESS compression format supports chunk
+ * sizes that are powers of 2 with exponents between 15 and 26 inclusively,
+ * whereas the LZX compression format currently only supports a chunk size
+ * of 32768.
+ *
* @return 0 on success; nonzero on error.
*
+ * @retval ::WIMLIB_ERR_INVALID_CHUNK_SIZE
+ * @p ctype is not a supported chunk size.
+ */
+extern int
+wimlib_set_output_chunk_size(WIMStruct *wim, uint32_t chunk_size);
+
+/**
+ * @ingroup G_writing_and_overwriting_wims
+ *
+ * Set the compression type of a WIM to use in subsequent calls to
+ * wimlib_write() or wimlib_overwrite().
+ *
* @param wim
* ::WIMStruct for a WIM.
* @param ctype
- * The compression type to set (one of ::wimlib_compression_type).
+ * The compression type to set (one of ::wimlib_compression_type). If this
+ * compression type is incompatible with the current output chunk size
+ * (either the default or as set with wimlib_set_output_chunk_size()), the
+ * output chunk size is reset to the default for that compression type.
*
* @return 0 on success; nonzero on error.
*
/**
* @ingroup G_compression
*
- * This function is equivalent to wimlib_lzx_compress(), but instead compresses
- * the data using "XPRESS" compression.
+ * Compress a chunk of data using XPRESS compression.
+ *
+ * This function is exported for convenience only and should only be used by
+ * library clients looking to make use of wimlib's compression code for another
+ * purpose.
+ *
+ * As of wimlib v1.5.4, this function can be used with @p chunk_size greater
+ * than 32768 bytes and is only limited by available memory. However, the
+ * XPRESS format itself still caps match offsets to 65535, so if a larger chunk
+ * size is chosen, then the matching will effectively occur in a sliding window
+ * over it.
+ *
+ * @param chunk
+ * Uncompressed data of the chunk.
+ * @param chunk_size
+ * Size of the uncompressed chunk, in bytes.
+ * @param out
+ * Pointer to output buffer of size at least (@p chunk_size - 1) bytes.
+ *
+ * @return
+ * The size of the compressed data written to @p out in bytes, or 0 if the
+ * data could not be compressed to (@p chunk_size - 1) bytes or fewer.
*/
extern unsigned
wimlib_xpress_compress(const void *chunk, unsigned chunk_size, void *out);
/**
* @ingroup G_compression
*
- * This function is equivalent to wimlib_lzx_decompress(), but instead assumes
- * the data is compressed using "XPRESS" compression.
+ * Decompresses a chunk of XPRESS-compressed data.
+ *
+ * This function is exported for convenience only and should only be used by
+ * library clients looking to make use of wimlib's compression code for another
+ * purpose.
+ *
+ * @param compressed_data
+ * Pointer to the compressed data.
+ *
+ * @param compressed_len
+ * Length of the compressed data, in bytes.
+ *
+ * @param uncompressed_data
+ * Pointer to the buffer into which to write the uncompressed data.
+ *
+ * @param uncompressed_len
+ * Length of the uncompressed data.
+ *
+ * @return
+ * 0 on success; non-zero on failure.
*/
extern int
wimlib_xpress_decompress(const void *compressed_data, unsigned compressed_len,
struct lz_params {
unsigned min_match;
unsigned max_match;
+ unsigned max_offset;
unsigned nice_match;
unsigned good_match;
unsigned max_chain_len;
lz_record_match_t record_match,
lz_record_literal_t record_literal,
void *record_ctx,
- const struct lz_params *params);
+ const struct lz_params *params,
+ input_idx_t prev_tab[]);
extern void
make_canonical_huffman_code(unsigned num_syms,
/* Length of the WIM header on disk. */
#define WIM_HEADER_DISK_SIZE 208
-/* Compressed resources in the WIM are divided into separated compressed chunks
- * of this size. This value is unfortunately not configurable (at least when
- * compatibility with Microsoft's software is desired). */
-#define WIM_CHUNK_SIZE 32768
-
/* Version of the WIM file. There is an older version (used for prerelease
* versions of Windows Vista), but wimlib doesn't support it. The differences
* between the versions are undocumented. */
/* Flags for the WIM file (WIM_HDR_FLAG_*) */
u32 wim_flags;
- /* Uncompressed chunk size of resources in the WIM. 0 if the WIM is
- * uncompressed. If compressed, WIM_CHUNK_SIZE is expected (currently
- * the only supported value). */
+ /* Chunk size for compressed resources in the WIM, or 0 if the WIM is
+ * uncompressed. */
u32 chunk_size;
/* Globally unique identifier for the WIM file. Basically a bunch of
/* Bitwise OR of one or more of the WIM_HDR_FLAG_* defined below. */
u32 flags;
+ /* Compressed resource chunk size */
+ u32 chunk_size;
+
/* A unique identifier for the WIM file. */
u8 guid[WIM_GID_LEN];
return lte->resource_entry.original_size;
}
+static inline u32
+wim_resource_chunk_size(const struct wim_lookup_table_entry * lte)
+{
+ if (lte->resource_location == RESOURCE_IN_WIM &&
+ lte->compression_type != WIMLIB_COMPRESSION_TYPE_NONE)
+ return lte->wim->chunk_size;
+ else
+ return 32768;
+}
+
+
static inline u64
wim_resource_chunks(const struct wim_lookup_table_entry *lte)
{
- return DIV_ROUND_UP(wim_resource_size(lte), WIM_CHUNK_SIZE);
+ return DIV_ROUND_UP(wim_resource_size(lte), wim_resource_chunk_size(lte));
}
static inline int
wim_resource_compression_type(const struct wim_lookup_table_entry *lte)
{
- BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
return lte->compression_type;
}
read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte,
u64 size,
consume_data_callback_t cb,
+ u32 in_chunk_size,
void *ctx_or_buf,
int _ignored_flags);
extern int
read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
u64 size, consume_data_callback_t cb,
- void *ctx_or_buf, int flags, u64 offset);
+ u32 in_chunk_size, void *ctx_or_buf,
+ int flags, u64 offset);
extern int
read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
extern int
read_resource_prefix(const struct wim_lookup_table_entry *lte,
- u64 size, consume_data_callback_t cb, void *ctx_or_buf,
- int flags);
+ u64 size, consume_data_callback_t cb,
+ u32 in_chunk_size, void *ctx_or_buf, int flags);
/* Functions to write a resource. */
extern int
write_wim_resource(struct wim_lookup_table_entry *lte, struct filedes *out_fd,
- int out_ctype, struct resource_entry *out_res_entry,
+ int out_ctype,
+ u32 out_chunk_size,
+ struct resource_entry *out_res_entry,
int write_resource_flags,
struct wimlib_lzx_context **comp_ctx);
write_wim_resource_from_buffer(const void *buf, size_t buf_size,
int reshdr_flags, struct filedes *out_fd,
int out_ctype,
+ u32 out_chunk_size,
struct resource_entry *out_res_entry,
u8 *hash_ret, int write_resource_flags,
struct wimlib_lzx_context **comp_ctx);
#endif
}
+static inline bool
+is_power_of_2(unsigned long n)
+{
+ return (n != 0 && (n & (n - 1)) == 0);
+
+}
+
static inline u64
hash_u64(u64 n)
{
* wimlib_set_output_compression_type(); otherwise is the same as
* compression_type. */
u8 out_compression_type : 2;
+
+ /* Uncompressed size of compressed chunks in this WIM (cached from
+ * header). */
+ u32 chunk_size;
+
+ /* Overridden chunk size for wimlib_overwrite() or wimlib_write(). Can
+ * be changed by wimlib_set_output_chunk_size(); otherwise is the same
+ * as chunk_size. */
+ u32 out_chunk_size;
};
static inline bool wim_is_pipable(const WIMStruct *wim)
get_wim_hdr_cflags(int ctype);
extern int
-init_wim_header(struct wim_header *hdr, int ctype);
+init_wim_header(struct wim_header *hdr, int ctype, u32 chunk_size);
extern int
read_wim_header(const tchar *filename, struct filedes *in_fd,
read_win32_file_prefix(const struct wim_lookup_table_entry *lte,
u64 size,
consume_data_callback_t cb,
+ u32 in_chunk_size,
void *ctx_or_buf,
int _ignored_flags);
read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte,
u64 size,
consume_data_callback_t cb,
+ u32 in_chunk_size,
void *ctx_or_buf,
int _ignored_flags);
IMAGEX_ALLOW_OTHER_OPTION,
IMAGEX_BOOT_OPTION,
IMAGEX_CHECK_OPTION,
+ IMAGEX_CHUNK_SIZE_OPTION,
IMAGEX_COMMAND_OPTION,
IMAGEX_COMMIT_OPTION,
IMAGEX_COMPRESS_OPTION,
{T("nocheck"), no_argument, NULL, IMAGEX_NOCHECK_OPTION},
{T("compress"), required_argument, NULL, IMAGEX_COMPRESS_OPTION},
{T("compress-slow"), no_argument, NULL, IMAGEX_COMPRESS_SLOW_OPTION},
+ {T("chunk-size"), required_argument, NULL, IMAGEX_CHUNK_SIZE_OPTION},
{T("config"), required_argument, NULL, IMAGEX_CONFIG_OPTION},
{T("dereference"), no_argument, NULL, IMAGEX_DEREFERENCE_OPTION},
{T("flags"), required_argument, NULL, IMAGEX_FLAGS_OPTION},
{T("recompress"), no_argument, NULL, IMAGEX_RECOMPRESS_OPTION},
{T("compress-slow"), no_argument, NULL, IMAGEX_COMPRESS_SLOW_OPTION},
{T("recompress-slow"), no_argument, NULL, IMAGEX_COMPRESS_SLOW_OPTION},
+ {T("chunk-size"), required_argument, NULL, IMAGEX_CHUNK_SIZE_OPTION},
{T("threads"), required_argument, NULL, IMAGEX_THREADS_OPTION},
{T("pipable"), no_argument, NULL, IMAGEX_PIPABLE_OPTION},
{T("not-pipable"), no_argument, NULL, IMAGEX_NOT_PIPABLE_OPTION},
}
}
+static uint32_t parse_chunk_size(const char *optarg)
+{
+ char *tmp;
+ unsigned long chunk_size = strtoul(optarg, &tmp, 10);
+ if (chunk_size >= UINT32_MAX || *tmp || tmp == optarg) {
+ imagex_error(T("Chunk size must be a non-negative integer!"));
+ return UINT32_MAX;
+ } else {
+ return chunk_size;
+ }
+}
+
+
/*
* Parse an option passed to an update command.
*
WIMLIB_ADD_IMAGE_FLAG_WINCONFIG;
int write_flags = 0;
int compression_type = WIMLIB_COMPRESSION_TYPE_INVALID;
+ uint32_t chunk_size = UINT32_MAX;
const tchar *wimfile;
int wim_fd;
const tchar *name;
goto out_err;
compression_type = WIMLIB_COMPRESSION_TYPE_LZX;
break;
+ case IMAGEX_CHUNK_SIZE_OPTION:
+ chunk_size = parse_chunk_size(optarg);
+ if (chunk_size == UINT32_MAX)
+ goto out_err;
+ break;
case IMAGEX_FLAGS_OPTION:
flags_element = optarg;
break;
if (ret)
goto out_free_config;
+ /* Set chunk size if non-default. */
+ if (chunk_size != UINT32_MAX) {
+ ret = wimlib_set_output_chunk_size(wim, chunk_size);
+ if (ret)
+ goto out_free_wim;
+ }
+
#ifndef __WIN32__
/* Detect if source is regular file or block device and set NTFS volume
* capture mode. */
ret = wimlib_create_new_wim(compression_type, &dest_wim);
if (ret)
goto out_free_src_wim;
+
+ wimlib_set_output_chunk_size(dest_wim, src_info.chunk_size);
}
image = wimlib_resolve_image(src_wim, src_image_num_or_name);
tprintf(T("Image Count: %d\n"), info->image_count);
tprintf(T("Compression: %"TS"\n"),
wimlib_get_compression_type_string(info->compression_type));
+ tprintf(T("Chunk Size: %"PRIu32" bytes\n"),
+ info->chunk_size);
tprintf(T("Part Number: %d/%d\n"), info->part_number, info->total_parts);
tprintf(T("Boot Index: %d\n"), info->boot_index);
tprintf(T("Size: %"PRIu64" bytes\n"), info->total_bytes);
int open_flags = WIMLIB_OPEN_FLAG_WRITE_ACCESS;
int write_flags = WIMLIB_WRITE_FLAG_REBUILD;
int compression_type = WIMLIB_COMPRESSION_TYPE_INVALID;
+ uint32_t chunk_size = UINT32_MAX;
int ret;
WIMStruct *wim;
const tchar *wimfile;
if (ret)
goto out_err;
break;
+ case IMAGEX_CHUNK_SIZE_OPTION:
+ chunk_size = parse_chunk_size(optarg);
+ if (chunk_size == UINT32_MAX)
+ goto out_err;
+ break;
case IMAGEX_THREADS_OPTION:
num_threads = parse_num_threads(optarg);
if (num_threads == UINT_MAX)
goto out;
if (compression_type != WIMLIB_COMPRESSION_TYPE_INVALID) {
+ /* Change compression type. */
ret = wimlib_set_output_compression_type(wim, compression_type);
if (ret)
goto out_wimlib_free;
}
+ if (chunk_size != UINT32_MAX) {
+ /* Change chunk size. */
+ ret = wimlib_set_output_chunk_size(wim, chunk_size);
+ if (ret)
+ goto out_wimlib_free;
+ }
+
old_size = file_get_size(wimfile);
tprintf(T("\"%"TS"\" original size: "), wimfile);
if (old_size == -1)
static int
skip_pwm_stream(struct wim_lookup_table_entry *lte)
{
- return read_partial_wim_resource(lte, wim_resource_size(lte),
- NULL, NULL,
+ return read_partial_wim_resource(lte,
+ wim_resource_size(lte),
+ NULL,
+ wim_resource_chunk_size(lte),
+ NULL,
WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY,
0);
}
}
hdr->flags = le32_to_cpu(disk_hdr.wim_flags);
- if (le32_to_cpu(disk_hdr.chunk_size) != WIM_CHUNK_SIZE &&
- (hdr->flags & WIM_HDR_FLAG_COMPRESSION)) {
- ERROR("\"%"TS"\": Unexpected chunk size of %u! Ask the author to "
- "implement support for other chunk sizes.",
- filename, le32_to_cpu(disk_hdr.chunk_size));
- ERROR("(Or it might just be that the WIM header is invalid.)");
- return WIMLIB_ERR_INVALID_CHUNK_SIZE;
- }
+
+ hdr->chunk_size = le32_to_cpu(disk_hdr.chunk_size);
memcpy(hdr->guid, disk_hdr.guid, WIM_GID_LEN);
disk_hdr.hdr_size = cpu_to_le32(sizeof(struct wim_header_disk));
disk_hdr.wim_version = cpu_to_le32(WIM_VERSION);
disk_hdr.wim_flags = cpu_to_le32(hdr->flags);
- disk_hdr.chunk_size = cpu_to_le32((hdr->flags & WIM_HDR_FLAG_COMPRESSION) ?
- WIM_CHUNK_SIZE : 0);
+ if (hdr->flags & WIM_HDR_FLAG_COMPRESSION)
+ disk_hdr.chunk_size = cpu_to_le32(hdr->chunk_size);
+ else
+ disk_hdr.chunk_size = 0;
memcpy(disk_hdr.guid, hdr->guid, WIM_GID_LEN);
disk_hdr.part_number = cpu_to_le16(hdr->part_number);
* Initializes the header for a WIM file.
*/
int
-init_wim_header(struct wim_header *hdr, int ctype)
+init_wim_header(struct wim_header *hdr, int ctype, u32 chunk_size)
{
memset(hdr, 0, sizeof(struct wim_header));
hdr->flags = get_wim_hdr_cflags(ctype);
ERROR("Invalid compression type specified (%d)", ctype);
return WIMLIB_ERR_INVALID_COMPRESSION_TYPE;
}
+ hdr->chunk_size = chunk_size;
hdr->total_parts = 1;
hdr->part_number = 1;
randomize_byte_array(hdr->guid, sizeof(hdr->guid));
if (hdr_flags[i].flag & hdr->flags)
tprintf(T(" WIM_HDR_FLAG_%s is set\n"), hdr_flags[i].name);
- tprintf(T("Chunk Size = %u\n"), WIM_CHUNK_SIZE);
+ tprintf(T("Chunk Size = %u\n"), wim->hdr.chunk_size);
tfputs (T("GUID = "), stdout);
print_byte_field(hdr->guid, WIM_GID_LEN, stdout);
tputchar(T('\n'));
0,
&wim->out_fd,
WIMLIB_COMPRESSION_TYPE_NONE,
+ 0,
&wim->hdr.integrity,
NULL,
0,
lte = CALLOC(1, sizeof(struct wim_lookup_table_entry));
if (lte) {
- lte->part_number = 1;
- lte->refcnt = 1;
+ lte->part_number = 1;
+ lte->refcnt = 1;
+ BUILD_BUG_ON(RESOURCE_NONEXISTENT != 0);
+ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
} else {
ERROR("Out of memory (tried to allocate %zu bytes for "
"lookup table entry)",
WIM_RESHDR_FLAG_METADATA,
out_fd,
WIMLIB_COMPRESSION_TYPE_NONE,
+ 0,
out_res_entry,
NULL,
write_resource_flags,
* @params: Parameters that affect how long the search will proceed
* before going with the best that has been found
* so far.
+ * @min_start_pos: If the chain reaches a match starting before this
+ * position (including the end-of-chain 0), the search will
+ * be terminated.
*
* Returns the length of the match that was found.
*/
unsigned strstart, const input_idx_t prev_tab[],
unsigned cur_match, unsigned prev_len,
unsigned *match_start_ret,
- const struct lz_params *params)
+ const struct lz_params *params,
+ unsigned min_start_pos)
{
unsigned chain_len = params->max_chain_len;
* performance reasons. Therefore uninitialized memory will be
* accessed, and conditional jumps will be made that depend on
* those values. However the length of the match is limited to
- * the lookahead, so the output of deflate is not affected by
- * the uninitialized values.
- */
+ * the lookahead, so the output of lz_analyze_block() is not
+ * affected by the uninitialized values. */
if (match[best_len] != scan_end
|| match[best_len - 1] != scan_end1
scan_end1 = scan[best_len - 1];
scan_end = scan[best_len];
}
- } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) != 0);
+ } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) >= min_start_pos);
*match_start_ret = match_start;
return min(min(best_len, bytes_remaining), params->max_match);
}
* @params: Structure that contains parameters that affect how the
* analysis proceeds (mainly how good the matches
* have to be).
+ * @prev_tab: Temporary space containing least @window_size elements.
*/
void
lz_analyze_block(const u8 window[],
lz_record_match_t record_match,
lz_record_literal_t record_literal,
void *record_ctx,
- const struct lz_params *params)
+ const struct lz_params *params,
+ input_idx_t prev_tab[])
{
unsigned cur_input_pos = 0;
unsigned hash = 0;
unsigned match_start = 0;
bool match_available = false;
input_idx_t hash_tab[HASH_SIZE];
- input_idx_t prev_tab[window_size];
+ unsigned min_start_pos = 1;
ZERO_ARRAY(hash_tab);
prev_start = match_start;
match_len = params->min_match - 1;
- if (hash_head != 0 && prev_len < params->max_lazy_match) {
+ if (cur_input_pos > params->max_offset)
+ min_start_pos = cur_input_pos - params->max_offset;
+ else
+ min_start_pos = 1;
+
+ if (hash_head >= min_start_pos &&
+ prev_len < params->max_lazy_match)
+ {
/* To simplify the code, we prevent matches with the
* string of window index 0 (in particular we have to
* avoid a match of the string with itself at the start
window_size - cur_input_pos,
cur_input_pos, prev_tab,
hash_head, prev_len,
- &match_start, params);
+ &match_start, params,
+ min_start_pos);
if (match_len == params->min_match &&
cur_input_pos - match_start > params->too_far)
* aren't worth choosing when using greedy or lazy parsing. */
.min_match = 3,
.max_match = LZX_MAX_MATCH_LEN,
+ .max_offset = 32768,
.good_match = LZX_MAX_MATCH_LEN,
.nice_match = LZX_MAX_MATCH_LEN,
.max_chain_len = LZX_MAX_MATCH_LEN,
record_ctx.matches = ctx->chosen_matches;
/* Determine series of matches/literals to output. */
- lz_analyze_block(ctx->window,
- ctx->window_size,
- lzx_record_match,
- lzx_record_literal,
- &record_ctx,
- &lzx_lz_params);
+ {
+ input_idx_t prev_tab[ctx->window_size];
+ lz_analyze_block(ctx->window,
+ ctx->window_size,
+ lzx_record_match,
+ lzx_record_literal,
+ &record_ctx,
+ &lzx_lz_params,
+ prev_tab);
+ }
/* Set up block specification. */
ret = write_wim_resource_from_buffer(buf, len, WIM_RESHDR_FLAG_METADATA,
&wim->out_fd,
wim->out_compression_type,
+ wim->out_chunk_size,
&imd->metadata_lte->output_resource_entry,
imd->metadata_lte->hash,
write_resource_flags,
read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte,
u64 size,
consume_data_callback_t cb,
+ u32 in_chunk_size,
void *ctx_or_buf,
int _ignored_flags)
{
s64 pos;
s64 bytes_remaining;
void *out_buf;
+ bool out_buf_malloced;
int ret;
+ size_t stack_max = 32768;
ni = ntfs_pathname_to_inode(vol, NULL, loc->path);
if (!ni) {
goto out_close_ntfs_inode;
}
- if (cb)
- out_buf = alloca(WIM_CHUNK_SIZE);
- else
+ out_buf_malloced = false;
+ if (cb) {
+ if (in_chunk_size <= stack_max) {
+ out_buf = alloca(in_chunk_size);
+ } else {
+ out_buf = MALLOC(in_chunk_size);
+ if (out_buf == NULL) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out_close_ntfs_attr;
+ }
+ out_buf_malloced = true;
+ }
+ } else {
out_buf = ctx_or_buf;
+ }
pos = (loc->is_reparse_point) ? 8 : 0;
bytes_remaining = size;
while (bytes_remaining) {
- s64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
+ s64 to_read = min(bytes_remaining, in_chunk_size);
if (ntfs_attr_pread(na, pos, to_read, out_buf) != to_read) {
ERROR_WITH_ERRNO("Error reading \"%"TS"\"", loc->path);
ret = WIMLIB_ERR_NTFS_3G;
- goto out_close_ntfs_attr;
+ goto out_free_memory;
}
pos += to_read;
bytes_remaining -= to_read;
if (cb) {
ret = cb(out_buf, to_read, ctx_or_buf);
if (ret)
- goto out_close_ntfs_attr;
+ goto out_free_memory;
} else {
out_buf += to_read;
}
}
ret = 0;
+out_free_memory:
+ if (out_buf_malloced)
+ FREE(out_buf);
out_close_ntfs_attr:
ntfs_attr_close(na);
out_close_ntfs_inode:
static decompress_func_t
get_decompress_func(int ctype)
{
- if (ctype == WIMLIB_COMPRESSION_TYPE_LZX)
+ switch (ctype) {
+ case WIMLIB_COMPRESSION_TYPE_LZX:
return wimlib_lzx_decompress;
- else
+ case WIMLIB_COMPRESSION_TYPE_XPRESS:
return wimlib_xpress_decompress;
+ default:
+ wimlib_assert(0);
+ return NULL;
+ }
}
/*
* stream and chunk headers.
*/
static int
-read_compressed_resource(const struct wim_lookup_table_entry *lte,
- u64 size, consume_data_callback_t cb,
- void *ctx_or_buf, int flags, u64 offset)
+read_compressed_resource(const struct wim_lookup_table_entry * const lte,
+ u64 size, const consume_data_callback_t cb,
+ const u32 in_chunk_size, void * const ctx_or_buf,
+ const int flags, const u64 offset)
{
int ret;
+ const u32 orig_chunk_size = wim_resource_chunk_size(lte);
+ const u32 orig_chunk_order = bsr32(orig_chunk_size);
+
+ wimlib_assert(is_power_of_2(orig_chunk_size));
+ wimlib_assert(cb == NULL || is_power_of_2(in_chunk_size));
+
/* Currently, reading raw compressed chunks is only guaranteed to work
* correctly when the full resource is requested. Furthermore, in such
* cases the requested size is specified as the compressed size, but
if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
wimlib_assert(offset == 0);
wimlib_assert(size == lte->resource_entry.size);
+ wimlib_assert(wim_resource_chunk_size(lte) == in_chunk_size);
size = wim_resource_size(lte);
}
if (size == 0)
return 0;
+ u64 *chunk_offsets = NULL;
+ u8 *out_buf = NULL;
+ u8 *tmp_buf = NULL;
+ void *compressed_buf = NULL;
+ bool chunk_offsets_malloced = false;
+ bool out_buf_malloced = false;
+ bool tmp_buf_malloced = false;
+ bool compressed_buf_malloced = false;
+ const size_t stack_max = 32768;
+
/* Get the appropriate decompression function. */
- decompress_func_t decompress =
+ const decompress_func_t decompress =
get_decompress_func(wim_resource_compression_type(lte));
/* Get the file descriptor for the WIM. */
- struct filedes *in_fd = <e->wim->in_fd;
+ struct filedes * const in_fd = <e->wim->in_fd;
/* Calculate the number of chunks the resource is divided into. */
- u64 num_chunks = wim_resource_chunks(lte);
+ const u64 num_chunks = wim_resource_chunks(lte);
/* Calculate the number of entries in the chunk table; it's one less
* than the number of chunks, since the first chunk has no entry. */
- u64 num_chunk_entries = num_chunks - 1;
+ const u64 num_chunk_entries = num_chunks - 1;
/* Calculate the 0-based index of the chunk at which the read starts.
*/
- u64 start_chunk = offset / WIM_CHUNK_SIZE;
+ const u64 start_chunk = offset >> orig_chunk_order;
/* Calculate the offset, within the start chunk, of the first byte of
* the read. */
- u64 start_offset_in_chunk = offset % WIM_CHUNK_SIZE;
+ const u32 start_offset_in_chunk = offset & (orig_chunk_size - 1);
/* Calculate the index of the chunk that contains the last byte of the
* read. */
- u64 end_chunk = (offset + size - 1) / WIM_CHUNK_SIZE;
+ const u64 end_chunk = (offset + size - 1) >> orig_chunk_order;
/* Calculate the offset, within the end chunk, of the last byte of the
* read. */
- u64 end_offset_in_chunk = (offset + size - 1) % WIM_CHUNK_SIZE;
+ const u32 end_offset_in_chunk = (offset + size - 1) & (orig_chunk_size - 1);
/* Calculate the number of chunk entries are actually needed to read the
* requested part of the resource. Include an entry for the first chunk
* account that if the last chunk required for the read is not the last
* chunk of the resource, an extra chunk entry is needed so that the
* compressed size of the last chunk of the read can be determined. */
- u64 num_alloc_chunk_entries = end_chunk - start_chunk + 1;
- if (end_chunk != num_chunks - 1)
- num_alloc_chunk_entries++;
+ const u64 num_alloc_chunk_entries = end_chunk - start_chunk +
+ 1 + (end_chunk != num_chunks - 1);
/* Set the size of each chunk table entry based on the resource's
* uncompressed size. */
- u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
+ const u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
/* Calculate the size, in bytes, of the full chunk table. */
- u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
+ const u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
/* Allocate a buffer to hold a subset of the chunk table. It will only
* contain offsets for the chunks that are actually needed for this
* read. For speed, allocate the buffer on the stack unless it's too
* large. */
- u64 *chunk_offsets;
- bool chunk_offsets_malloced;
- if (num_alloc_chunk_entries < 1024) {
+ if (num_alloc_chunk_entries <= stack_max) {
chunk_offsets = alloca(num_alloc_chunk_entries * sizeof(u64));
chunk_offsets_malloced = false;
} else {
- chunk_offsets = malloc(num_alloc_chunk_entries * sizeof(u64));
+ chunk_offsets = MALLOC(num_alloc_chunk_entries * sizeof(u64));
if (!chunk_offsets) {
ERROR("Failed to allocate chunk table "
"with %"PRIu64" entries", num_alloc_chunk_entries);
chunk_offsets[0] = 0;
/* Calculate the index of the first needed entry in the chunk table. */
- u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
+ const u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
/* Calculate the number of entries that need to be read from the chunk
* table. */
- u64 num_needed_chunk_entries = (start_chunk == 0) ?
+ const u64 num_needed_chunk_entries = (start_chunk == 0) ?
num_alloc_chunk_entries - 1 : num_alloc_chunk_entries;
/* Calculate the number of bytes of data that need to be read from the
* chunk table. */
- size_t chunk_table_needed_size =
+ const size_t chunk_table_needed_size =
num_needed_chunk_entries * chunk_entry_size;
if ((u64)chunk_table_needed_size !=
num_needed_chunk_entries * chunk_entry_size)
{
ERROR("Compressed read request too large to fit into memory!");
ret = WIMLIB_ERR_NOMEM;
- goto out_free_chunk_offsets;
+ goto out_free_memory;
}
/* Calculate the byte offset, in the WIM file, of the first chunk table
* entry to read. Take into account that if the WIM file is in the
* special "pipable" format, then the chunk table is at the end of the
* resource, not the beginning. */
- u64 file_offset_of_needed_chunk_entries =
- lte->resource_entry.offset + (start_table_idx *
- chunk_entry_size);
- if (lte->is_pipable)
- file_offset_of_needed_chunk_entries += lte->resource_entry.size -
- chunk_table_size;
+ const u64 file_offset_of_needed_chunk_entries =
+ lte->resource_entry.offset
+ + (start_table_idx * chunk_entry_size)
+ + (lte->is_pipable ? (lte->resource_entry.size - chunk_table_size) : 0);
/* Read the needed chunk table entries into the end of the chunk_offsets
* buffer. */
- void *chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
- chunk_table_needed_size;
+ void * const chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
+ chunk_table_needed_size;
ret = full_pread(in_fd, chunk_tab_data, chunk_table_needed_size,
file_offset_of_needed_chunk_entries);
if (ret)
{
typedef le64 __attribute__((may_alias)) aliased_le64_t;
typedef le32 __attribute__((may_alias)) aliased_le32_t;
- u64 *chunk_offsets_p = chunk_offsets;
+ u64 * const chunk_offsets_p = chunk_offsets + (start_chunk == 0);
u64 i;
- if (start_chunk == 0)
- chunk_offsets_p++;
-
if (chunk_entry_size == 4) {
aliased_le32_t *raw_entries = (aliased_le32_t*)chunk_tab_data;
for (i = 0; i < num_needed_chunk_entries; i++)
}
}
- /* Calculate file offset of the first chunk that needs to be read. N.B.
- * if the resource is pipable, the entries in the chunk table do *not*
- * include the chunk headers. */
+ /* Calculate file offset of the first chunk that needs to be read.
+ * Note: if the resource is pipable, the entries in the chunk table do
+ * *not* include the chunk headers. */
u64 cur_read_offset = lte->resource_entry.offset + chunk_offsets[0];
if (!lte->is_pipable)
cur_read_offset += chunk_table_size;
else
- cur_read_offset += start_chunk *
- sizeof(struct pwm_chunk_hdr);
+ cur_read_offset += start_chunk * sizeof(struct pwm_chunk_hdr);
/* If using a callback function, allocate a temporary buffer that will
* be used to pass data to it. If writing directly to a buffer instead,
* arrange to write data directly into it. */
- u8 *out_p;
- if (cb)
- out_p = alloca(WIM_CHUNK_SIZE);
- else
- out_p = ctx_or_buf;
+ size_t out_buf_size;
+ u8 *out_buf_end, *out_p;
+ if (cb) {
+ out_buf_size = max(in_chunk_size, orig_chunk_size);
+ if (out_buf_size <= stack_max) {
+ out_buf = alloca(out_buf_size);
+ } else {
+ out_buf = MALLOC(out_buf_size);
+ if (out_buf == NULL) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out_free_memory;
+ }
+ out_buf_malloced = true;
+ }
+ } else {
+ out_buf_size = size;
+ out_buf = ctx_or_buf;
+ }
+ out_buf_end = out_buf + out_buf_size;
+ out_p = out_buf;
/* Unless the raw compressed data was requested, allocate a temporary
* buffer for reading compressed chunks, each of which can be at most
- * WIM_CHUNK_SIZE - 1 bytes. This excludes compressed chunks that are a
- * full WIM_CHUNK_SIZE bytes, which are handled separately. */
- void *compressed_buf;
- if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
- compressed_buf = alloca(WIM_CHUNK_SIZE - 1);
+ * orig_chunk_size - 1 bytes. This excludes compressed chunks that are
+ * a full orig_chunk_size bytes, which are actually stored uncompressed.
+ */
+ if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS)) {
+ if (orig_chunk_size - 1 <= stack_max) {
+ compressed_buf = alloca(orig_chunk_size - 1);
+ } else {
+ compressed_buf = MALLOC(orig_chunk_size - 1);
+ if (compressed_buf == NULL) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out_free_memory;
+ }
+ compressed_buf_malloced = true;
+ }
+ }
+
+ /* Allocate yet another temporary buffer, this one for reading partial
+ * chunks. */
+ if (start_offset_in_chunk != 0 ||
+ (end_offset_in_chunk != orig_chunk_size - 1 &&
+ offset + size != wim_resource_size(lte)))
+ {
+ if (orig_chunk_size <= stack_max) {
+ tmp_buf = alloca(orig_chunk_size);
+ } else {
+ tmp_buf = MALLOC(orig_chunk_size);
+ if (tmp_buf == NULL) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out_free_memory;
+ }
+ tmp_buf_malloced = true;
+ }
+ }
/* Read, and possibly decompress, each needed chunk, either writing the
* data directly into the @ctx_or_buf buffer or passing it to the @cb
/* Calculate the sizes of the compressed chunk and of the
* uncompressed chunk. */
- unsigned compressed_chunk_size;
- unsigned uncompressed_chunk_size;
+ u32 compressed_chunk_size;
+ u32 uncompressed_chunk_size;
if (i != num_chunks - 1) {
/* Not the last chunk. Compressed size is given by
* difference of chunk table entries; uncompressed size
- * is always 32768 bytes. */
+ * is always the WIM chunk size. */
compressed_chunk_size = chunk_offsets[i + 1 - start_chunk] -
chunk_offsets[i - start_chunk];
- uncompressed_chunk_size = WIM_CHUNK_SIZE;
+ uncompressed_chunk_size = orig_chunk_size;
} else {
/* Last chunk. Compressed size is the remaining size in
* the compressed resource; uncompressed size is the
compressed_chunk_size -= num_chunks *
sizeof(struct pwm_chunk_hdr);
- if (wim_resource_size(lte) % WIM_CHUNK_SIZE == 0)
- uncompressed_chunk_size = WIM_CHUNK_SIZE;
+ if ((wim_resource_size(lte) & (orig_chunk_size - 1)) == 0)
+ uncompressed_chunk_size = orig_chunk_size;
else
- uncompressed_chunk_size = wim_resource_size(lte) %
- WIM_CHUNK_SIZE;
+ uncompressed_chunk_size = wim_resource_size(lte) &
+ (orig_chunk_size - 1);
}
/* Calculate how much of this chunk needs to be read. */
- unsigned partial_chunk_size;
- u64 start_offset = 0;
- u64 end_offset = WIM_CHUNK_SIZE - 1;
+ u32 partial_chunk_size;
+ u32 start_offset = 0;
+ u32 end_offset = orig_chunk_size - 1;
if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
partial_chunk_size = compressed_chunk_size;
{
/* Chunk stored uncompressed, or reading raw chunk data. */
ret = full_pread(in_fd,
- cb ? out_p + start_offset : out_p,
+ out_p,
partial_chunk_size,
cur_read_offset + start_offset);
if (ret)
goto read_error;
} else {
/* Compressed chunk and not doing raw read. */
+ u8 *target;
/* Read the compressed data into compressed_buf. */
ret = full_pread(in_fd,
if (ret)
goto read_error;
- /* For partial chunks and when writing directly to a
- * buffer, we must buffer the uncompressed data because
- * we don't need all of it. */
- if (partial_chunk_size != uncompressed_chunk_size &&
- cb == NULL)
- {
- u8 uncompressed_buf[uncompressed_chunk_size];
-
- ret = (*decompress)(compressed_buf,
- compressed_chunk_size,
- uncompressed_buf,
- uncompressed_chunk_size);
- if (ret) {
- ERROR("Failed to decompress data.");
- ret = WIMLIB_ERR_DECOMPRESSION;
- errno = EINVAL;
- goto out_free_chunk_offsets;
- }
- memcpy(out_p, uncompressed_buf + start_offset,
- partial_chunk_size);
- } else {
- ret = (*decompress)(compressed_buf,
- compressed_chunk_size,
- out_p,
- uncompressed_chunk_size);
- if (ret) {
- ERROR("Failed to decompress data.");
- ret = WIMLIB_ERR_DECOMPRESSION;
- errno = EINVAL;
- goto out_free_chunk_offsets;
- }
+ /* For partial chunks we must buffer the uncompressed
+ * data because we don't need all of it. */
+ if (partial_chunk_size == uncompressed_chunk_size)
+ target = out_p;
+ else
+ target = tmp_buf;
+
+ /* Decompress the chunk. */
+ ret = (*decompress)(compressed_buf,
+ compressed_chunk_size,
+ target,
+ uncompressed_chunk_size);
+ if (ret) {
+ ERROR("Failed to decompress data.");
+ ret = WIMLIB_ERR_DECOMPRESSION;
+ errno = EINVAL;
+ goto out_free_memory;
}
+ if (partial_chunk_size != uncompressed_chunk_size)
+ memcpy(out_p, tmp_buf + start_offset,
+ partial_chunk_size);
}
+
+ out_p += partial_chunk_size;
+
if (cb) {
/* Feed the data to the callback function. */
- ret = cb(out_p + start_offset,
- partial_chunk_size, ctx_or_buf);
- if (ret)
- goto out_free_chunk_offsets;
- } else {
- /* No callback function provided; we are writing
- * directly to a buffer. Advance the pointer into this
- * buffer by the number of uncompressed bytes that were
- * written. */
- out_p += partial_chunk_size;
+ wimlib_assert(offset == 0);
+
+ if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
+ ret = cb(out_buf, out_p - out_buf, ctx_or_buf);
+ if (ret)
+ goto out_free_memory;
+ out_p = out_buf;
+
+ } else if (i == end_chunk || out_p == out_buf_end) {
+ size_t bytes_sent;
+ const u8 *p;
+
+ for (p = out_buf; p != out_p; p += bytes_sent) {
+ bytes_sent = min(in_chunk_size, out_p - p);
+ ret = cb(p, bytes_sent, ctx_or_buf);
+ if (ret)
+ goto out_free_memory;
+ }
+ out_p = out_buf;
+ }
}
cur_read_offset += compressed_chunk_size;
}
ret = 0;
-out_free_chunk_offsets:
+out_free_memory:
if (chunk_offsets_malloced)
FREE(chunk_offsets);
+ if (out_buf_malloced)
+ FREE(out_buf);
+ if (compressed_buf_malloced)
+ FREE(compressed_buf);
+ if (tmp_buf_malloced)
+ FREE(tmp_buf);
return ret;
read_error:
ERROR_WITH_ERRNO("Error reading compressed file resource");
- goto out_free_chunk_offsets;
+ goto out_free_memory;
}
/* Skip over the chunk table at the end of pipable, compressed resource being
static int
read_pipable_resource(const struct wim_lookup_table_entry *lte,
u64 size, consume_data_callback_t cb,
- void *ctx_or_buf, int flags, u64 offset)
+ u32 in_chunk_size, void *ctx_or_buf,
+ int flags, u64 offset)
{
struct filedes *in_fd;
decompress_func_t decompress;
int ret;
- u8 chunk[WIM_CHUNK_SIZE];
- u8 cchunk[WIM_CHUNK_SIZE - 1];
+ const u32 orig_chunk_size = wim_resource_chunk_size(lte);
+ u8 cchunk[orig_chunk_size - 1];
+
+ size_t out_buf_size;
+ u8 *out_buf, *out_buf_end, *out_p;
+ if (cb) {
+ out_buf_size = max(in_chunk_size, orig_chunk_size);
+ out_buf = alloca(out_buf_size);
+ } else {
+ out_buf_size = size;
+ out_buf = ctx_or_buf;
+ }
+ out_buf_end = out_buf + out_buf_size;
+ out_p = out_buf;
/* Get pointers to appropriate decompression function and the input file
* descriptor. */
wimlib_assert(size == wim_resource_size(lte));
wimlib_assert(in_fd->offset == lte->resource_entry.offset);
- for (offset = 0; offset < size; offset += WIM_CHUNK_SIZE) {
+ u32 chunk_usize;
+ for (offset = 0; offset < size; offset += chunk_usize) {
struct pwm_chunk_hdr chunk_hdr;
- u32 chunk_size;
- u32 cchunk_size;
- u8 *res_chunk;
- u32 res_chunk_size;
+ u32 chunk_csize;
/* Calculate uncompressed size of next chunk. */
- chunk_size = min(WIM_CHUNK_SIZE, size - offset);
+ chunk_usize = min(orig_chunk_size, size - offset);
/* Read the compressed size of the next chunk from the chunk
* header. */
if (ret)
goto read_error;
- cchunk_size = le32_to_cpu(chunk_hdr.compressed_size);
+ chunk_csize = le32_to_cpu(chunk_hdr.compressed_size);
- if (cchunk_size > WIM_CHUNK_SIZE) {
+ if (chunk_csize > orig_chunk_size) {
errno = EINVAL;
ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
goto invalid;
}
/* Read chunk data. */
- ret = full_read(in_fd, cchunk, cchunk_size);
+ ret = full_read(in_fd, cchunk, chunk_csize);
if (ret)
goto read_error;
/* Decompress chunk if needed. Uncompressed size same
* as compressed size means the chunk is uncompressed.
*/
- res_chunk_size = chunk_size;
- if (cchunk_size == chunk_size) {
- res_chunk = cchunk;
+ if (chunk_csize == chunk_usize) {
+ memcpy(out_p, cchunk, chunk_usize);
} else {
- ret = (*decompress)(cchunk, cchunk_size,
- chunk, chunk_size);
+ ret = (*decompress)(cchunk, chunk_csize,
+ out_p, chunk_usize);
if (ret) {
errno = EINVAL;
ret = WIMLIB_ERR_DECOMPRESSION;
goto invalid;
}
- res_chunk = chunk;
}
+ out_p += chunk_usize;
/* Feed the uncompressed data into the callback function or copy
* it into the provided buffer. */
- if (cb) {
- ret = cb(res_chunk, res_chunk_size, ctx_or_buf);
- if (ret)
- return ret;
- } else {
- ctx_or_buf = mempcpy(ctx_or_buf, res_chunk,
- res_chunk_size);
+ if (cb && (out_p == out_buf_end ||
+ offset + chunk_usize == size))
+ {
+ size_t bytes_sent;
+ const u8 *p;
+
+ for (p = out_buf; p != out_p; p += bytes_sent) {
+ bytes_sent = min(in_chunk_size, out_p - p);
+ ret = cb(p, bytes_sent, ctx_or_buf);
+ if (ret)
+ return ret;
+ }
+ out_p = out_buf;
}
}
int
read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
u64 size, consume_data_callback_t cb,
+ u32 in_chunk_size,
void *ctx_or_buf, int flags, u64 offset)
{
struct filedes *in_fd;
* somewhere else. */
wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
+ /* If a callback was specified, in_chunk_size must be a power of 2 (and
+ * not 0). */
+ wimlib_assert(cb == NULL || is_power_of_2(in_chunk_size));
+
+ /* If a callback was specified, offset must be zero. */
+ wimlib_assert(cb == NULL || offset == 0);
+
/* Retrieve input file descriptor for the WIM file. */
in_fd = <e->wim->in_fd;
}
} else if (cb) {
/* Send data to callback function */
- u8 buf[min(WIM_CHUNK_SIZE, size)];
+ u8 buf[min(in_chunk_size, size)];
while (size) {
- size_t bytes_to_read = min(WIM_CHUNK_SIZE,
- size);
+ size_t bytes_to_read = min(in_chunk_size, size);
ret = full_pread(in_fd, buf, bytes_to_read,
offset);
if (ret)
} else if (lte->is_pipable && !filedes_is_seekable(in_fd)) {
/* Reading compressed, pipable resource from pipe. */
ret = read_pipable_resource(lte, size, cb,
+ in_chunk_size,
ctx_or_buf, flags, offset);
} else {
/* Reading compressed, possibly pipable resource from seekable
* file. */
ret = read_compressed_resource(lte, size, cb,
+ in_chunk_size,
ctx_or_buf, flags, offset);
}
goto out;
read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
size_t size, u64 offset, void *buf)
{
- return read_partial_wim_resource(lte, size, NULL, buf, 0, offset);
+ return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset);
}
static int
read_wim_resource_prefix(const struct wim_lookup_table_entry *lte,
u64 size,
consume_data_callback_t cb,
+ u32 in_chunk_size,
void *ctx_or_buf,
int flags)
{
- return read_partial_wim_resource(lte, size, cb, ctx_or_buf, flags, 0);
+ return read_partial_wim_resource(lte, size, cb, in_chunk_size,
+ ctx_or_buf, flags, 0);
}
read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
u64 size,
consume_data_callback_t cb,
+ u32 in_chunk_size,
void *ctx_or_buf,
int _ignored_flags)
{
int ret;
struct filedes fd;
int raw_fd;
+ u8 *out_buf;
+ bool out_buf_malloced;
+ const size_t stack_max = 32768;
DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"",
size, lte->file_on_disk);
return WIMLIB_ERR_OPEN;
}
filedes_init(&fd, raw_fd);
+ out_buf_malloced = false;
if (cb) {
/* Send data to callback function */
- u8 buf[min(WIM_CHUNK_SIZE, size)];
+ if (in_chunk_size <= stack_max) {
+ out_buf = alloca(in_chunk_size);
+ } else {
+ out_buf = MALLOC(in_chunk_size);
+ if (out_buf == NULL) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out_close;
+ }
+ out_buf_malloced = true;
+ }
+
size_t bytes_to_read;
while (size) {
- bytes_to_read = min(WIM_CHUNK_SIZE, size);
- ret = full_read(&fd, buf, bytes_to_read);
+ bytes_to_read = min(in_chunk_size, size);
+ ret = full_read(&fd, out_buf, bytes_to_read);
if (ret)
goto read_error;
- ret = cb(buf, bytes_to_read, ctx_or_buf);
+ ret = cb(out_buf, bytes_to_read, ctx_or_buf);
if (ret)
goto out_close;
size -= bytes_to_read;
ERROR_WITH_ERRNO("Error reading \"%"TS"\"", filename);
out_close:
filedes_close(&fd);
+ if (out_buf_malloced)
+ FREE(out_buf);
return ret;
}
#endif /* !__WIN32__ */
static int
read_buffer_prefix(const struct wim_lookup_table_entry *lte,
u64 size, consume_data_callback_t cb,
+ u32 in_chunk_size,
void *ctx_or_buf, int _ignored_flags)
{
- const void *inbuf = lte->attached_buffer;
- int ret;
if (cb) {
- while (size) {
- size_t chunk_size = min(WIM_CHUNK_SIZE, size);
- ret = cb(inbuf, chunk_size, ctx_or_buf);
+ int ret;
+ u32 chunk_size;
+
+ for (u64 offset = 0; offset < size; offset += chunk_size) {
+ chunk_size = min(in_chunk_size, size - offset);
+ ret = cb((const u8*)lte->attached_buffer + offset,
+ chunk_size, ctx_or_buf);
if (ret)
return ret;
- size -= chunk_size;
- inbuf += chunk_size;
}
} else {
- memcpy(ctx_or_buf, inbuf, size);
+ memcpy(ctx_or_buf, lte->attached_buffer, size);
}
return 0;
}
typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
u64 size,
consume_data_callback_t cb,
+ u32 in_chunk_size,
void *ctx_or_buf,
int flags);
*/
int
read_resource_prefix(const struct wim_lookup_table_entry *lte,
- u64 size, consume_data_callback_t cb, void *ctx_or_buf,
- int flags)
+ u64 size, consume_data_callback_t cb, u32 in_chunk_size,
+ void *ctx_or_buf, int flags)
{
static const read_resource_prefix_handler_t handlers[] = {
[RESOURCE_IN_WIM] = read_wim_resource_prefix,
};
wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
&& handlers[lte->resource_location] != NULL);
- return handlers[lte->resource_location](lte, size, cb, ctx_or_buf, flags);
+ wimlib_assert(cb == NULL || in_chunk_size > 0);
+ return handlers[lte->resource_location](lte, size, cb, in_chunk_size, ctx_or_buf, flags);
}
int
read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
void *buf)
{
- return read_resource_prefix(lte, wim_resource_size(lte), NULL, buf, 0);
+ return read_resource_prefix(lte, wim_resource_size(lte), NULL, 0, buf, 0);
}
int
sha1_init(&ctx.sha_ctx);
ret = read_resource_prefix(lte, size,
extract_chunk_sha1_wrapper,
+ wim_resource_chunk_size(lte),
&ctx, 0);
if (ret == 0) {
u8 hash[SHA1_HASH_SIZE];
} else {
/* Don't do SHA1 */
ret = read_resource_prefix(lte, size, extract_chunk,
+ wim_resource_chunk_size(lte),
extract_chunk_arg, 0);
}
return ret;
sha1_init(&sha_ctx);
ret = read_resource_prefix(lte, wim_resource_size(lte),
- sha1_chunk, &sha_ctx, 0);
+ sha1_chunk, wim_resource_chunk_size(lte),
+ &sha_ctx, 0);
if (ret == 0)
sha1_final(lte->hash, &sha_ctx);
return ret;
[WIMLIB_ERR_INVALID_CAPTURE_CONFIG]
= T("The capture configuration string was invalid"),
[WIMLIB_ERR_INVALID_CHUNK_SIZE]
- = T("The WIM is compressed but does not have a chunk "
- "size of 32768"),
+ = T("The WIM chunk size was invalid"),
[WIMLIB_ERR_INVALID_COMPRESSION_TYPE]
= T("The WIM is compressed, but is not marked as having LZX or "
"XPRESS compression"),
return wim;
}
+static bool
+wim_chunk_size_valid(u32 chunk_size, int ctype)
+{
+ u32 order;
+
+ /* Chunk size is meaningless for uncompressed WIMs --- any value is
+ * okay. */
+ if (ctype == WIMLIB_COMPRESSION_TYPE_NONE)
+ return true;
+
+ /* Chunk size must be power of 2. */
+ if (chunk_size == 0)
+ return false;
+ order = bsr32(chunk_size);
+ if (chunk_size != 1U << order)
+ return false;
+
+ /* Order Size
+ * ===== ====
+ * 15 32768
+ * 16 65536
+ * 17 131072
+ * 18 262144
+ * 19 524288
+ * 20 1048576
+ * 21 2097152
+ * 22 4194304
+ * 23 8388608
+ * 24 16777216
+ * 25 33554432
+ * 26 67108864
+ */
+ switch (ctype) {
+ case WIMLIB_COMPRESSION_TYPE_LZX:
+ /* TODO: Allow other chunk sizes when supported by the LZX
+ * compressor and decompressor. */
+ return order == 15;
+
+ case WIMLIB_COMPRESSION_TYPE_XPRESS:
+ /* WIMGAPI (Windows 7) didn't seem to support XPRESS chunk size
+ * below 32768 bytes, but larger power-of-two sizes appear to be
+ * supported. 67108864 was the largest size that worked.
+ * (Note, however, that the offsets of XPRESS matches are still
+ * limited to 65535 bytes even when a much larger chunk size is
+ * used!) */
+ return order >= 15 && order <= 26;
+ }
+ return false;
+}
+
+static u32
+wim_default_chunk_size(int ctype)
+{
+ return 32768;
+}
+
/*
* Calls a function on images in the WIM. If @image is WIMLIB_ALL_IMAGES, @visitor
* is called on the WIM once for each image, with each image selected as the
if (!wim)
return WIMLIB_ERR_NOMEM;
- ret = init_wim_header(&wim->hdr, ctype);
+ ret = init_wim_header(&wim->hdr, ctype, wim_default_chunk_size(ctype));
if (ret != 0)
goto out_free;
wim->refcnts_ok = 1;
wim->compression_type = ctype;
wim->out_compression_type = ctype;
+ wim->chunk_size = wim->hdr.chunk_size;
+ wim->out_chunk_size = wim->hdr.chunk_size;
*wim_ret = wim;
return 0;
out_free:
info->image_count = wim->hdr.image_count;
info->boot_index = wim->hdr.boot_idx;
info->wim_version = WIM_VERSION;
- info->chunk_size = WIM_CHUNK_SIZE;
+ info->chunk_size = wim->hdr.chunk_size;
info->part_number = wim->hdr.part_number;
info->total_parts = wim->hdr.total_parts;
info->compression_type = wim->compression_type;
return 0;
}
+/* API function documented in wimlib.h */
+WIMLIBAPI int
+wimlib_set_output_compression_type(WIMStruct *wim, int ctype)
+{
+ switch (ctype) {
+ case WIMLIB_COMPRESSION_TYPE_INVALID:
+ break;
+ case WIMLIB_COMPRESSION_TYPE_NONE:
+ case WIMLIB_COMPRESSION_TYPE_LZX:
+ case WIMLIB_COMPRESSION_TYPE_XPRESS:
+ wim->out_compression_type = ctype;
+
+ /* Reset the chunk size if it's no longer valid. */
+ if (!wim_chunk_size_valid(wim->out_chunk_size,
+ wim->out_compression_type))
+ wim->out_chunk_size = wim_default_chunk_size(wim->out_compression_type);
+ return 0;
+ }
+ return WIMLIB_ERR_INVALID_PARAM;
+}
+
+/* API function documented in wimlib.h */
+WIMLIBAPI int
+wimlib_set_output_chunk_size(WIMStruct *wim, uint32_t chunk_size)
+{
+ if (!wim_chunk_size_valid(chunk_size, wim->out_compression_type)) {
+ ERROR("Invalid chunk size (%"PRIu32" bytes) "
+ "for compression type %"TS"!",
+ chunk_size,
+ wimlib_get_compression_type_string(wim->out_compression_type));
+ switch (wim->out_compression_type) {
+ case WIMLIB_COMPRESSION_TYPE_XPRESS:
+ ERROR("Valid chunk sizes for XPRESS are 32768, 65536, 131072, ..., 67108864.");
+ break;
+ case WIMLIB_COMPRESSION_TYPE_LZX:
+ ERROR("Valid chunk sizes for XPRESS are 65536.");
+ break;
+ }
+ return WIMLIB_ERR_INVALID_CHUNK_SIZE;
+ }
+ wim->out_chunk_size = chunk_size;
+ return 0;
+}
+
static int
do_open_wim(const tchar *filename, struct filedes *fd_ret)
{
}
wim->out_compression_type = wim->compression_type;
+ /* Check and cache the chunk size. */
+ wim->chunk_size = wim->out_chunk_size = wim->hdr.chunk_size;
+ if (!wim_chunk_size_valid(wim->chunk_size, wim->compression_type)) {
+ ERROR("Invalid chunk size (%"PRIu32" bytes) "
+ "for compression type %"TS"!",
+ wim->chunk_size,
+ wimlib_get_compression_type_string(wim->compression_type));
+ return WIMLIB_ERR_INVALID_CHUNK_SIZE;
+ }
+
if (open_flags & WIMLIB_OPEN_FLAG_CHECK_INTEGRITY) {
ret = check_wim_integrity(wim, progress_func);
if (ret == WIM_INTEGRITY_NONEXISTENT) {
read_win32_file_prefix(const struct wim_lookup_table_entry *lte,
u64 size,
consume_data_callback_t cb,
+ u32 in_chunk_size,
void *ctx_or_buf,
int _ignored_flags)
{
int ret = 0;
void *out_buf;
+ bool out_buf_malloced;
u64 bytes_remaining;
+ const size_t stack_max = 32768;
HANDLE hFile = win32_open_existing_file(lte->file_on_disk,
FILE_READ_DATA);
return WIMLIB_ERR_OPEN;
}
- if (cb)
- out_buf = alloca(WIM_CHUNK_SIZE);
- else
+ out_buf_malloced = false;
+ if (cb) {
+ if (in_chunk_size <= stack_max) {
+ out_buf = alloca(in_chunk_size);
+ } else {
+ out_buf = MALLOC(in_chunk_size);
+ if (out_buf == NULL) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out_close_handle;
+ }
+ out_buf_malloced = true;
+ }
+ } else {
out_buf = ctx_or_buf;
+ }
bytes_remaining = size;
while (bytes_remaining) {
DWORD bytesToRead, bytesRead;
- bytesToRead = min(WIM_CHUNK_SIZE, bytes_remaining);
+ bytesToRead = min(in_chunk_size, bytes_remaining);
if (!ReadFile(hFile, out_buf, bytesToRead, &bytesRead, NULL) ||
bytesRead != bytesToRead)
{
out_buf += bytesRead;
}
}
+ if (out_buf_malloced)
+ FREE(out_buf);
+out_close_handle:
CloseHandle(hFile);
return ret;
}
void *buf;
size_t buf_filled;
u64 bytes_remaining;
+ u32 in_chunk_size;
};
static DWORD WINAPI
{
const void *data = _data;
struct win32_encrypted_read_ctx *ctx = _ctx;
+ u32 in_chunk_size = ctx->in_chunk_size;
int ret;
DEBUG("len = %lu", len);
len);
while (bytes_to_buffer) {
size_t bytes_to_copy_to_buf =
- min(bytes_to_buffer, WIM_CHUNK_SIZE - ctx->buf_filled);
+ min(bytes_to_buffer, in_chunk_size - ctx->buf_filled);
memcpy(ctx->buf + ctx->buf_filled, data,
bytes_to_copy_to_buf);
data += bytes_to_copy_to_buf;
bytes_to_buffer -= bytes_to_copy_to_buf;
- if (ctx->buf_filled == WIM_CHUNK_SIZE ||
+ if (ctx->buf_filled == in_chunk_size ||
ctx->buf_filled == ctx->bytes_remaining)
{
ret = (*ctx->read_prefix_cb)(ctx->buf,
read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte,
u64 size,
consume_data_callback_t cb,
+ u32 in_chunk_size,
void *ctx_or_buf,
int _ignored_flags)
{
export_ctx.read_prefix_ctx_or_buf = ctx_or_buf;
export_ctx.wimlib_err_code = 0;
if (cb) {
- export_ctx.buf = MALLOC(WIM_CHUNK_SIZE);
+ export_ctx.buf = MALLOC(in_chunk_size);
if (!export_ctx.buf)
return WIMLIB_ERR_NOMEM;
} else {
# include <sys/uio.h> /* for `struct iovec' */
#endif
+/* Return true if the specified resource is compressed and the compressed data
+ * can be reused with the specified output parameters. */
+static bool
+can_raw_copy(const struct wim_lookup_table_entry *lte,
+ int write_resource_flags, int out_ctype, u32 out_chunk_size)
+{
+ return (out_ctype == wim_resource_compression_type(lte)
+ && out_chunk_size == wim_resource_chunk_size(lte)
+ && out_ctype != WIMLIB_COMPRESSION_TYPE_NONE);
+}
+
+
+/* Return true if the specified resource must be recompressed when the specified
+ * output parameters are used. */
+static bool
+must_compress_stream(const struct wim_lookup_table_entry *lte,
+ int write_resource_flags, int out_ctype, u32 out_chunk_size)
+{
+ return (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE
+ && ((write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS)
+ || !can_raw_copy(lte, write_resource_flags,
+ out_ctype, out_chunk_size)));
+}
+
static unsigned
compress_chunk(const void * uncompressed_data,
unsigned uncompressed_len,
static int
begin_wim_resource_chunk_tab(const struct wim_lookup_table_entry *lte,
struct filedes *out_fd,
+ u32 out_chunk_size,
struct chunk_table **chunk_tab_ret,
int resource_flags)
{
int ret;
size = wim_resource_size(lte);
- num_chunks = wim_resource_chunks(lte);
+ num_chunks = DIV_ROUND_UP(size, out_chunk_size);
bytes_per_chunk_entry = (size > (1ULL << 32)) ? 8 : 4;
alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
chunk_tab = CALLOC(1, alloc_size);
struct write_resource_ctx {
int out_ctype;
+ u32 out_chunk_size;
struct wimlib_lzx_context *comp_ctx;
struct chunk_table *chunk_tab;
struct filedes *out_fd;
const void *out_chunk;
unsigned out_chunk_size;
int ret;
+ void *compressed_chunk = NULL;
+ unsigned compressed_size;
+ bool compressed_chunk_malloced = false;
+ size_t stack_max = 32768;
if (ctx->doing_sha)
sha1_update(&ctx->sha_ctx, chunk, chunk_size);
out_chunk = chunk;
out_chunk_size = chunk_size;
if (ctx->out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
- void *compressed_chunk;
- unsigned compressed_size;
/* Compress the chunk. */
- compressed_chunk = alloca(chunk_size);
+ if (chunk_size <= stack_max) {
+ compressed_chunk = alloca(chunk_size);
+ } else {
+ compressed_chunk = MALLOC(chunk_size);
+ if (compressed_chunk == NULL)
+ return WIMLIB_ERR_NOMEM;
+ compressed_chunk_malloced = true;
+ }
compressed_size = compress_chunk(chunk, chunk_size,
compressed_chunk,
ret = full_write(ctx->out_fd, out_chunk, out_chunk_size);
if (ret)
goto error;
- return 0;
+
+out_free_memory:
+ if (compressed_chunk_malloced)
+ FREE(compressed_chunk);
+ return ret;
error:
ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
- return ret;
+ goto out_free_memory;
}
/*
int
write_wim_resource(struct wim_lookup_table_entry *lte,
struct filedes *out_fd, int out_ctype,
+ u32 out_chunk_size,
struct resource_entry *out_res_entry,
int resource_flags,
struct wimlib_lzx_context **comp_ctx)
{
struct write_resource_ctx write_ctx;
off_t res_start_offset;
+ u32 in_chunk_size;
u64 read_size;
int ret;
* desired other than no compression, we can simply copy the compressed
* data without recompressing it. This also means we must skip
* calculating the SHA1, as we never will see the uncompressed data. */
- if (lte->resource_location == RESOURCE_IN_WIM &&
- out_ctype == wim_resource_compression_type(lte) &&
- out_ctype != WIMLIB_COMPRESSION_TYPE_NONE &&
- !(resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS))
- {
+ if (can_raw_copy(lte, resource_flags, out_ctype, out_chunk_size)) {
/* Normally we can request a RAW_FULL read, but if we're reading
* from a pipable resource and writing a non-pipable resource or
* vice versa, then a RAW_CHUNKS read needs to be requested so
resource_flags |= WIMLIB_READ_RESOURCE_FLAG_RAW_FULL;
else
resource_flags |= WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS;
- write_ctx.doing_sha = false;
read_size = lte->resource_entry.size;
+ write_ctx.doing_sha = false;
} else {
write_ctx.doing_sha = true;
sha1_init(&write_ctx.sha_ctx);
read_size = lte->resource_entry.original_size;
}
-
/* If the output resource is to be compressed, initialize the chunk
* table and set the function to use for chunk compression. Exceptions:
* no compression function is needed if doing a raw copy; also, no chunk
* table is needed if doing a *full* (not per-chunk) raw copy. */
write_ctx.out_ctype = WIMLIB_COMPRESSION_TYPE_NONE;
+ write_ctx.out_chunk_size = out_chunk_size;
write_ctx.chunk_tab = NULL;
if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
+ wimlib_assert(out_chunk_size > 0);
if (!(resource_flags & WIMLIB_READ_RESOURCE_FLAG_RAW)) {
write_ctx.out_ctype = out_ctype;
if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZX) {
}
if (!(resource_flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL)) {
ret = begin_wim_resource_chunk_tab(lte, out_fd,
+ out_chunk_size,
&write_ctx.chunk_tab,
resource_flags);
if (ret)
write_ctx.out_fd = out_fd;
write_ctx.resource_flags = resource_flags;
try_write_again:
+ if (write_ctx.out_ctype == WIMLIB_COMPRESSION_TYPE_NONE)
+ in_chunk_size = wim_resource_chunk_size(lte);
+ else
+ in_chunk_size = out_chunk_size;
ret = read_resource_prefix(lte, read_size,
- write_resource_cb, &write_ctx, resource_flags);
+ write_resource_cb,
+ in_chunk_size, &write_ctx, resource_flags);
if (ret)
goto out_free_chunk_tab;
write_wim_resource_from_buffer(const void *buf, size_t buf_size,
int reshdr_flags, struct filedes *out_fd,
int out_ctype,
+ u32 out_chunk_size,
struct resource_entry *out_res_entry,
u8 *hash_ret, int write_resource_flags,
struct wimlib_lzx_context **comp_ctx)
lte.attached_buffer = (void*)buf;
lte.resource_entry.original_size = buf_size;
lte.resource_entry.flags = reshdr_flags;
+ lte.compression_type = WIMLIB_COMPRESSION_TYPE_NONE;
if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) {
sha1_buffer(buf, buf_size, lte.hash);
lte.unhashed = 1;
}
- ret = write_wim_resource(<e, out_fd, out_ctype, out_res_entry,
- write_resource_flags, comp_ctx);
+ ret = write_wim_resource(<e, out_fd, out_ctype, out_chunk_size,
+ out_res_entry, write_resource_flags, comp_ctx);
if (ret)
return ret;
if (hash_ret)
struct message {
struct wim_lookup_table_entry *lte;
+ u32 out_chunk_size;
u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
struct serial_write_stream_ctx {
struct filedes *out_fd;
int out_ctype;
+ u32 out_chunk_size;
struct wimlib_lzx_context **comp_ctx;
int write_resource_flags;
};
{
struct serial_write_stream_ctx *ctx = _ctx;
return write_wim_resource(lte, ctx->out_fd,
- ctx->out_ctype, <e->output_resource_entry,
+ ctx->out_ctype,
+ ctx->out_chunk_size,
+ <e->output_resource_entry,
ctx->write_resource_flags,
ctx->comp_ctx);
}
struct wim_lookup_table *lookup_table,
struct filedes *out_fd,
int out_ctype,
+ u32 out_chunk_size,
struct wimlib_lzx_context **comp_ctx,
int write_resource_flags,
struct write_streams_progress_data *progress_data)
struct serial_write_stream_ctx ctx = {
.out_fd = out_fd,
.out_ctype = out_ctype,
+ .out_chunk_size = out_chunk_size,
.write_resource_flags = write_resource_flags,
.comp_ctx = comp_ctx,
};
struct wim_lookup_table *lookup_table,
struct filedes *out_fd,
int out_ctype,
+ u32 out_chunk_size,
struct wimlib_lzx_context **comp_ctx,
int write_resource_flags,
struct write_streams_progress_data *progress_data)
lookup_table,
out_fd,
out_ctype,
+ out_chunk_size,
comp_ctx,
write_resource_flags,
progress_data);
struct filedes *out_fd;
off_t res_start_offset;
int out_ctype;
+ u32 out_chunk_size;
struct wimlib_lzx_context **comp_ctx;
int write_resource_flags;
struct shared_queue *res_to_compress_queue;
};
static int
-init_message(struct message *msg)
+init_message(struct message *msg, u32 out_chunk_size)
{
+ msg->out_chunk_size = out_chunk_size;
for (size_t i = 0; i < MAX_CHUNKS_PER_MSG; i++) {
- msg->compressed_chunks[i] = MALLOC(WIM_CHUNK_SIZE);
- msg->uncompressed_chunks[i] = MALLOC(WIM_CHUNK_SIZE);
+ msg->compressed_chunks[i] = MALLOC(out_chunk_size);
+ msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
if (msg->compressed_chunks[i] == NULL ||
msg->uncompressed_chunks[i] == NULL)
return WIMLIB_ERR_NOMEM;
}
static struct message *
-allocate_messages(size_t num_messages)
+allocate_messages(size_t num_messages, u32 out_chunk_size)
{
struct message *msgs;
if (!msgs)
return NULL;
for (size_t i = 0; i < num_messages; i++) {
- if (init_message(&msgs[i])) {
+ if (init_message(&msgs[i], out_chunk_size)) {
free_messages(msgs, num_messages);
return NULL;
}
{
/* Pre-allocate all the buffers that will be needed to do the chunk
* compression. */
- ctx->msgs = allocate_messages(ctx->num_messages);
+ ctx->msgs = allocate_messages(ctx->num_messages, ctx->out_chunk_size);
if (!ctx->msgs)
return WIMLIB_ERR_NOMEM;
* it if needed. */
ret = begin_wim_resource_chunk_tab(cur_lte,
ctx->out_fd,
+ ctx->out_chunk_size,
&ctx->cur_chunk_tab,
ctx->write_resource_flags);
if (ret)
ret = write_wim_resource(cur_lte,
ctx->out_fd,
WIMLIB_COMPRESSION_TYPE_NONE,
+ 0,
&cur_lte->output_resource_entry,
ctx->write_resource_flags,
ctx->comp_ctx);
ctx->lookup_table,
ctx->out_fd,
ctx->out_ctype,
+ ctx->out_chunk_size,
ctx->comp_ctx,
ctx->write_resource_flags,
ctx->progress_data);
ctx->lookup_table,
ctx->out_fd,
ctx->out_ctype,
+ ctx->out_chunk_size,
ctx->comp_ctx,
ctx->write_resource_flags,
ctx->progress_data);
* when @lte is already hashed. */
sha1_init(&ctx->next_sha_ctx);
ctx->next_chunk = 0;
- ctx->next_num_chunks = wim_resource_chunks(lte);
+ ctx->next_num_chunks = DIV_ROUND_UP(wim_resource_size(lte),
+ ctx->out_chunk_size);
ctx->next_lte = lte;
INIT_LIST_HEAD(<e->msg_list);
list_add_tail(<e->being_compressed_list, &ctx->outstanding_streams);
ret = read_resource_prefix(lte, wim_resource_size(lte),
- main_writer_thread_cb, ctx, 0);
+ main_writer_thread_cb,
+ ctx->out_chunk_size, ctx, 0);
if (ret)
return ret;
wimlib_assert(ctx->next_chunk == ctx->next_num_chunks);
int ret;
if (wim_resource_size(lte) < 1000 ||
- ctx->out_ctype == WIMLIB_COMPRESSION_TYPE_NONE ||
- (lte->resource_location == RESOURCE_IN_WIM &&
- !(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS) &&
- lte->wim->compression_type == ctx->out_ctype))
+ !must_compress_stream(lte, ctx->write_resource_flags,
+ ctx->out_ctype, ctx->out_chunk_size))
{
/* Stream is too small or isn't being compressed. Process it by
* the main thread when we have a chance. We can't necessarily
* create the number of threads requested.
*
* High level description of the algorithm for writing compressed streams in
- * parallel: We perform compression on chunks of size WIM_CHUNK_SIZE bytes
- * rather than on full files. The currently executing thread becomes the main
- * thread and is entirely in charge of reading the data to compress (which may
- * be in any location understood by the resource code--- such as in an external
- * file being captured, or in another WIM file from which an image is being
- * exported) and actually writing the compressed data to the output file.
- * Additional threads are "compressor threads" and all execute the
- * compressor_thread_proc, where they repeatedly retrieve buffers of data from
- * the main thread, compress them, and hand them back to the main thread.
+ * parallel: We perform compression on chunks rather than on full files. The
+ * currently executing thread becomes the main thread and is entirely in charge
+ * of reading the data to compress (which may be in any location understood by
+ * the resource code--- such as in an external file being captured, or in
+ * another WIM file from which an image is being exported) and actually writing
+ * the compressed data to the output file. Additional threads are "compressor
+ * threads" and all execute the compressor_thread_proc, where they repeatedly
+ * retrieve buffers of data from the main thread, compress them, and hand them
+ * back to the main thread.
*
* Certain streams, such as streams that do not need to be compressed (e.g.
* input compression type same as output compression type) or streams of very
struct wim_lookup_table *lookup_table,
struct filedes *out_fd,
int out_ctype,
+ u32 out_chunk_size,
struct wimlib_lzx_context **comp_ctx,
int write_resource_flags,
struct write_streams_progress_data *progress_data,
ctx.lookup_table = lookup_table;
ctx.out_fd = out_fd;
ctx.out_ctype = out_ctype;
+ ctx.out_chunk_size = out_chunk_size;
ctx.comp_ctx = comp_ctx;
ctx.res_to_compress_queue = &res_to_compress_queue;
ctx.compressed_res_queue = &compressed_res_queue;
lookup_table,
out_fd,
out_ctype,
+ out_chunk_size,
comp_ctx,
write_resource_flags,
progress_data);
write_stream_list(struct list_head *stream_list,
struct wim_lookup_table *lookup_table,
struct filedes *out_fd, int out_ctype,
+ u32 out_chunk_size,
struct wimlib_lzx_context **comp_ctx,
int write_flags,
unsigned num_threads, wimlib_progress_func_t progress_func)
list_for_each_entry(lte, stream_list, write_streams_list) {
num_streams++;
total_bytes += wim_resource_size(lte);
- if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE
- && (wim_resource_compression_type(lte) != out_ctype ||
- (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS)))
- {
+ if (must_compress_stream(lte, write_resource_flags,
+ out_ctype, out_chunk_size))
total_compression_bytes += wim_resource_size(lte);
- }
if (lte->resource_location == RESOURCE_IN_WIM) {
if (prev_wim_part != lte->wim) {
prev_wim_part = lte->wim;
lookup_table,
out_fd,
out_ctype,
+ out_chunk_size,
comp_ctx,
write_resource_flags,
&progress_data,
lookup_table,
out_fd,
out_ctype,
+ out_chunk_size,
comp_ctx,
write_resource_flags,
&progress_data);
wim->lookup_table,
&wim->out_fd,
wim->out_compression_type,
+ wim->out_chunk_size,
&wim->lzx_context,
write_flags,
num_threads,
ret = write_wim_resource(imd->metadata_lte,
&wim->out_fd,
wim->out_compression_type,
+ wim->out_chunk_size,
&imd->metadata_lte->output_resource_entry,
write_resource_flags,
&wim->lzx_context);
* finish_write(). */
}
-/* API function documented in wimlib.h */
-WIMLIBAPI int
-wimlib_set_output_compression_type(WIMStruct *wim, int ctype)
-{
- switch (ctype) {
- case WIMLIB_COMPRESSION_TYPE_INVALID:
- break;
- case WIMLIB_COMPRESSION_TYPE_NONE:
- case WIMLIB_COMPRESSION_TYPE_LZX:
- case WIMLIB_COMPRESSION_TYPE_XPRESS:
- wim->out_compression_type = ctype;
- return 0;
- }
- return WIMLIB_ERR_INVALID_PARAM;
-}
-
/* Write a standalone WIM or split WIM (SWM) part to a new file or to a file
* descriptor. */
int
if (wim->compression_type != wim->out_compression_type)
wim->hdr.flags = get_wim_hdr_cflags(wim->out_compression_type);
+ /* Set chunk size if different. */
+ wim->hdr.chunk_size = wim->out_chunk_size;
+
/* Use GUID if specified; otherwise generate a new one. */
if (guid)
memcpy(wim->hdr.guid, guid, WIMLIB_GUID_LEN);
wim->lookup_table,
&wim->out_fd,
wim->compression_type,
+ wim->chunk_size,
&wim->lzx_context,
write_flags,
num_threads,
&& !(write_flags & (WIMLIB_WRITE_FLAG_REBUILD |
WIMLIB_WRITE_FLAG_PIPABLE))
&& !(wim_is_pipable(wim))
- && wim->compression_type == wim->out_compression_type)
+ && wim->compression_type == wim->out_compression_type
+ && wim->chunk_size == wim->out_chunk_size)
{
ret = overwrite_wim_inplace(wim, write_flags, num_threads,
progress_func);
WIM_RESHDR_FLAG_METADATA,
&wim->out_fd,
WIMLIB_COMPRESSION_TYPE_NONE,
+ 0,
out_res_entry,
NULL,
write_resource_flags,
#include "wimlib/util.h"
#include "wimlib/xpress.h"
+#ifdef HAVE_ALLOCA_H
+# include <alloca.h>
+#endif
+
#include <string.h>
/* Intermediate XPRESS match/literal representation. */
static const struct lz_params xpress_lz_params = {
.min_match = XPRESS_MIN_MATCH_LEN,
.max_match = XPRESS_MAX_MATCH_LEN,
+ .max_offset = XPRESS_MAX_OFFSET,
.good_match = 16,
.nice_match = 32,
.max_chain_len = 16,
struct output_bitstream ostream;
struct xpress_record_ctx record_ctx;
- struct xpress_match matches[uncompressed_len];
- u8 udata[uncompressed_len + 8];
+
+ struct xpress_match *matches;
+ input_idx_t *prev_tab;
+ u8 *udata;
+
u16 codewords[XPRESS_NUM_SYMBOLS];
u8 lens[XPRESS_NUM_SYMBOLS];
input_idx_t num_matches;
input_idx_t compressed_len;
input_idx_t i;
+ const size_t stack_max = 65536;
/* XPRESS requires 256 bytes of overhead for the Huffman code, so it's
* impossible to compress 256 bytes or less of data to less than the
if (uncompressed_len < XPRESS_NUM_SYMBOLS / 2 + 1 + 4)
return 0;
+ if (uncompressed_len <= stack_max) {
+ matches = alloca(uncompressed_len * sizeof(matches[0]));
+ udata = alloca(uncompressed_len + 8);
+ prev_tab = alloca(uncompressed_len * sizeof(prev_tab[0]));
+ } else {
+ matches = MALLOC(uncompressed_len * sizeof(matches[0]));
+ udata = MALLOC(uncompressed_len + 8);
+ prev_tab = MALLOC(uncompressed_len * sizeof(prev_tab[0]));
+ if (matches == NULL || udata == NULL || prev_tab == NULL) {
+ WARNING("Failed to allocate memory for compression...");
+ compressed_len = 0;
+ goto out_free;
+ }
+ }
+
/* Copy the data to a temporary buffer, but only to avoid
* inconsequential accesses of uninitialized memory in
* lz_analyze_block(). */
xpress_record_match,
xpress_record_literal,
&record_ctx,
- &xpress_lz_params);
+ &xpress_lz_params,
+ prev_tab);
num_matches = (record_ctx.matches - matches);
/* Flush any pending data and get the length of the compressed data. */
compressed_len = flush_output_bitstream(&ostream);
- if (compressed_len == ~(input_idx_t)0)
- return 0;
+ if (compressed_len == ~(input_idx_t)0) {
+ compressed_len = 0;
+ goto out_free;
+ }
compressed_len += XPRESS_NUM_SYMBOLS / 2;
#if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION) || 1
ERROR("Failed to decompress data we "
"compressed using XPRESS algorithm");
wimlib_assert(0);
- return 0;
+ compressed_len = 0;
+ goto out_free;
}
if (memcmp(uncompressed_data, udata, uncompressed_len)) {
ERROR("Data we compressed using XPRESS algorithm "
"didn't decompress to original");
wimlib_assert(0);
- return 0;
+ compressed_len = 0;
+ goto out_free;
}
#endif
+
+out_free:
+ if (uncompressed_len > stack_max) {
+ FREE(matches);
+ FREE(udata);
+ FREE(prev_tab);
+ }
return compressed_len;
}