X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fresource.c;h=f7684b10baee9bab6fffaa109f0977e79a19d1ed;hp=1539460e670496cf5c9fa5b705905c05fa891011;hb=c5a50ca10987e37b7e880f71f342fd78270f27a0;hpb=c5746b5e79df3d5c129f6185cf5fa130ab0512df diff --git a/src/resource.c b/src/resource.c index 1539460e..f7684b10 100644 --- a/src/resource.c +++ b/src/resource.c @@ -1,7 +1,7 @@ /* * resource.c * - * Read uncompressed and compressed metadata and file resources from a WIM file. + * Code for reading streams and resources, including compressed WIM resources. */ /* @@ -27,15 +27,17 @@ #endif #include "wimlib.h" +#include "wimlib/assert.h" #include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/file_io.h" #include "wimlib/lookup_table.h" #include "wimlib/resource.h" #include "wimlib/sha1.h" +#include "wimlib/wim.h" #ifdef __WIN32__ -/* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */ +/* for read_winnt_file_prefix(), read_win32_encrypted_file_prefix() */ # include "wimlib/win32.h" #endif @@ -90,59 +92,47 @@ */ -/* Decompress the specified chunk that uses the specified compression type - * @ctype, part of a WIM with default chunk size @wim_chunk_size. For LZX the - * separate @wim_chunk_size is needed because it determines the window size used - * for LZX compression. */ -static int -decompress(const void *cchunk, unsigned clen, void *uchunk, unsigned ulen, - int ctype, u32 wim_chunk_size) -{ - switch (ctype) { - case WIMLIB_COMPRESSION_TYPE_LZX: - return wimlib_lzx_decompress2(cchunk, clen, - uchunk, ulen, wim_chunk_size); - case WIMLIB_COMPRESSION_TYPE_XPRESS: - return wimlib_xpress_decompress(cchunk, clen, - uchunk, ulen); - case WIMLIB_COMPRESSION_TYPE_LZMS: - return wimlib_lzms_decompress(cchunk, clen, uchunk, ulen); - default: - wimlib_assert(0); - return -1; - } -} - struct data_range { u64 offset; u64 size; }; -/* Alternate chunk table format for resources with WIM_RESHDR_FLAG_CONCAT set. +/* + * read_compressed_wim_resource() - + * + * Read data from a compressed WIM resource. + * + * @rspec + * Specification of the compressed WIM resource to read from. + * @ranges + * Nonoverlapping, nonempty ranges of the uncompressed resource data to + * read, sorted by increasing offset. + * @num_ranges + * Number of ranges in @ranges; must be at least 1. + * @cb + * Callback function to feed the data being read. Each call provides the + * next chunk of the requested data, uncompressed. Each chunk will be of + * nonzero size and will not cross range boundaries, but otherwise will be + * of unspecified size. + * @cb_ctx + * Parameter to pass to @cb_ctx. + * + * Possible return values: + * + * WIMLIB_ERR_SUCCESS (0) + * WIMLIB_ERR_READ (errno set) + * WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0) + * WIMLIB_ERR_NOMEM (errno set to ENOMEM) + * WIMLIB_ERR_DECOMPRESSION (errno set to EINVAL) + * + * or other error code returned by the @cb function. */ -struct alt_chunk_table_header_disk { - /* Uncompressed size of the resource. */ - le64 res_usize; - - /* Number of bytes each compressed chunk decompresses into, except - * possibly the last which decompresses into the remainder. */ - le32 chunk_size; - - /* ??? */ - le32 unknown; - - /* This header is directly followed by a table of compressed sizes of - * the chunks. */ -} _packed_attribute; - -/* Read data from a compressed WIM resource. */ static int read_compressed_wim_resource(const struct wim_resource_spec * const rspec, const struct data_range * const ranges, const size_t num_ranges, const consume_data_callback_t cb, - void * const cb_ctx, - const bool raw_chunks_mode) + void * const cb_ctx) { int ret; int errno_save; @@ -153,14 +143,17 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, bool chunk_offsets_malloced = false; bool ubuf_malloced = false; bool cbuf_malloced = false; + struct wimlib_decompressor *decompressor = NULL; /* Sanity checks */ wimlib_assert(rspec != NULL); - wimlib_assert(rspec->ctype != WIMLIB_COMPRESSION_TYPE_NONE); - wimlib_assert(is_power_of_2(rspec->cchunk_size)); + wimlib_assert(resource_is_compressed(rspec)); wimlib_assert(cb != NULL); wimlib_assert(num_ranges != 0); for (size_t i = 0; i < num_ranges; i++) { + DEBUG("Range %zu/%zu: %"PRIu64"@+%"PRIu64" / %"PRIu64, + i + 1, num_ranges, ranges[i].size, ranges[i].offset, + rspec->uncompressed_size); wimlib_assert(ranges[i].size != 0); wimlib_assert(ranges[i].offset + ranges[i].size >= ranges[i].size); wimlib_assert(ranges[i].offset + ranges[i].size <= rspec->uncompressed_size); @@ -176,35 +169,52 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, struct filedes * const in_fd = &rspec->wim->in_fd; /* Determine if we're reading a pipable resource from a pipe or not. */ - const bool is_pipe_read = !filedes_is_seekable(in_fd); + const bool is_pipe_read = (rspec->is_pipable && !filedes_is_seekable(in_fd)); - /* Determine if the chunk table is in an altenate format. */ - const bool alt_chunk_table = (rspec->flags & WIM_RESHDR_FLAG_CONCAT) && !is_pipe_read; + /* Determine if the chunk table is in an alternate format. */ + const bool alt_chunk_table = (rspec->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) + && !is_pipe_read; /* Get the maximum size of uncompressed chunks in this resource, which * we require be a power of 2. */ - u32 chunk_size; u64 cur_read_offset = rspec->offset_in_wim; + int ctype = rspec->compression_type; + u32 chunk_size = rspec->chunk_size; if (alt_chunk_table) { - /* Alternate chunk table format. */ - struct alt_chunk_table_header_disk hdr; - - ret = full_pread(in_fd, &hdr, sizeof(hdr), cur_read_offset); - if (ret) - goto read_error; - cur_read_offset += sizeof(hdr); + /* Alternate chunk table format. Its header specifies the chunk + * size and compression format. Note: it could be read here; + * however, the relevant data was already loaded into @rspec by + * read_wim_lookup_table(). */ + cur_read_offset += sizeof(struct alt_chunk_table_header_disk); + } - chunk_size = le32_to_cpu(hdr.chunk_size); + if (!is_power_of_2(chunk_size)) { + ERROR("Invalid compressed resource: " + "expected power-of-2 chunk size (got %"PRIu32")", + chunk_size); + ret = WIMLIB_ERR_INVALID_CHUNK_SIZE; + errno = EINVAL; + goto out_free_memory; + } - if (!is_power_of_2(chunk_size)) { - ERROR("Invalid compressed resource: " - "expected power-of-2 chunk size (got %u)", chunk_size); - ret = WIMLIB_ERR_INVALID_CHUNK_SIZE; + /* Get valid decompressor. */ + if (ctype == rspec->wim->decompressor_ctype && + chunk_size == rspec->wim->decompressor_max_block_size) + { + /* Cached decompressor. */ + decompressor = rspec->wim->decompressor; + rspec->wim->decompressor_ctype = WIMLIB_COMPRESSION_TYPE_NONE; + rspec->wim->decompressor = NULL; + } else { + ret = wimlib_create_decompressor(ctype, chunk_size, NULL, + &decompressor); + if (ret) { + if (ret != WIMLIB_ERR_NOMEM) + errno = EINVAL; goto out_free_memory; } - } else { - chunk_size = rspec->cchunk_size; } + const u32 chunk_order = bsr32(chunk_size); /* Calculate the total number of chunks the resource is divided into. */ @@ -233,16 +243,15 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, const u64 num_chunk_entries = (alt_chunk_table ? num_chunks : num_chunks - 1); /* Set the size of each chunk table entry based on the resource's - * uncompressed size. XXX: Does the alternate chunk table really - * always have 4-byte entries? */ - const u64 chunk_entry_size = - (rspec->uncompressed_size > (1ULL << 32) && !alt_chunk_table) - ? 8 : 4; + * uncompressed size. */ + const u64 chunk_entry_size = get_chunk_entry_size(rspec->uncompressed_size, + alt_chunk_table); /* Calculate the size of the chunk table in bytes. */ const u64 chunk_table_size = num_chunk_entries * chunk_entry_size; - /* Includes header */ + /* Calculate the size of the chunk table in bytes, including the header + * in the case of the alternate chunk table format. */ const u64 chunk_table_full_size = (alt_chunk_table) ? chunk_table_size + sizeof(struct alt_chunk_table_header_disk) : chunk_table_size; @@ -310,7 +319,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, chunk_offsets_alloc_size - chunk_table_size_to_read; - ret = full_pread(in_fd, chunk_table_data, chunk_table_size, + ret = full_pread(in_fd, chunk_table_data, chunk_table_size_to_read, file_offset_of_needed_chunk_entries); if (ret) goto read_error; @@ -332,6 +341,8 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, *chunk_offsets_p++ = cur_offset; cur_offset += entry; } + if (last_needed_chunk < num_chunks - 1) + *chunk_offsets_p = cur_offset; } else { if (read_start_chunk == 0) *chunk_offsets_p++ = 0; @@ -365,27 +376,26 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, ubuf_malloced = true; } - /* Unless the raw compressed data was requested, allocate a temporary - * buffer for reading compressed chunks, each of which can be at most - * @chunk_size - 1 bytes. This excludes compressed chunks that are a - * full @chunk_size bytes, which are actually stored uncompressed. */ - if (!raw_chunks_mode) { - if (chunk_size - 1 <= STACK_MAX) { - cbuf = alloca(chunk_size - 1); - } else { - cbuf = MALLOC(chunk_size - 1); - if (cbuf == NULL) - goto oom; - cbuf_malloced = true; - } + /* Allocate a temporary buffer for reading compressed chunks, each of + * which can be at most @chunk_size - 1 bytes. This excludes compressed + * chunks that are a full @chunk_size bytes, which are actually stored + * uncompressed. */ + if (chunk_size - 1 <= STACK_MAX) { + cbuf = alloca(chunk_size - 1); + } else { + cbuf = MALLOC(chunk_size - 1); + if (cbuf == NULL) + goto oom; + cbuf_malloced = true; } - /* Read and process each needed chunk. */ + /* Set current data range. */ const struct data_range *cur_range = ranges; const struct data_range * const end_range = &ranges[num_ranges]; u64 cur_range_pos = cur_range->offset; u64 cur_range_end = cur_range->offset + cur_range->size; + /* Read and process each needed chunk. */ for (u64 i = read_start_chunk; i <= last_needed_chunk; i++) { /* Calculate uncompressed size of next chunk. */ @@ -426,7 +436,8 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, if (rspec->is_pipable) cur_read_offset += sizeof(struct pwm_chunk_hdr); - /* Uncompressed offsets */ + /* Offsets in the uncompressed resource at which this chunk + * starts and ends. */ const u64 chunk_start_offset = i << chunk_order; const u64 chunk_end_offset = chunk_start_offset + chunk_usize; @@ -434,7 +445,6 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, /* The next range does not require data in this chunk, * so skip it. */ - cur_read_offset += chunk_csize; if (is_pipe_read) { u8 dummy; @@ -447,54 +457,59 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, /* Read the chunk and feed data to the callback * function. */ - u8 *cb_buf; + u8 *read_buf; + + if (chunk_csize == chunk_usize) + read_buf = ubuf; + else + read_buf = cbuf; ret = full_pread(in_fd, - cbuf, + read_buf, chunk_csize, cur_read_offset); if (ret) goto read_error; - if (chunk_csize != chunk_usize && !raw_chunks_mode) { - ret = decompress(cbuf, - chunk_csize, - ubuf, - chunk_usize, - rspec->ctype, - chunk_size); + if (read_buf == cbuf) { + DEBUG("Decompressing chunk %"PRIu64" " + "(csize=%"PRIu32" usize=%"PRIu32")", + i, chunk_csize, chunk_usize); + ret = wimlib_decompress(cbuf, + chunk_csize, + ubuf, + chunk_usize, + decompressor); if (ret) { ERROR("Failed to decompress data!"); ret = WIMLIB_ERR_DECOMPRESSION; errno = EINVAL; goto out_free_memory; } - cb_buf = ubuf; - } else { - cb_buf = cbuf; } cur_read_offset += chunk_csize; - /* At least one range requires data in this chunk. - * However, the data fed to the callback function must - * not overlap range boundaries. */ + /* At least one range requires data in this chunk. */ do { size_t start, end, size; + /* Calculate how many bytes of data should be + * sent to the callback function, taking into + * account that data sent to the callback + * function must not overlap range boundaries. + */ start = cur_range_pos - chunk_start_offset; end = min(cur_range_end, chunk_end_offset) - chunk_start_offset; size = end - start; - if (raw_chunks_mode) - ret = (*cb)(&cb_buf[0], chunk_csize, cb_ctx); - else - ret = (*cb)(&cb_buf[start], size, cb_ctx); + ret = (*cb)(&ubuf[start], size, cb_ctx); if (ret) goto out_free_memory; cur_range_pos += size; if (cur_range_pos == cur_range_end) { + /* Advance to next range. */ if (++cur_range == end_range) { cur_range_pos = ~0ULL; } else { @@ -506,21 +521,30 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, } } - if (is_pipe_read - && last_offset == rspec->uncompressed_size - 1 - && chunk_table_size) + if (is_pipe_read && + last_offset == rspec->uncompressed_size - 1 && + chunk_table_size) { u8 dummy; - /* Skip chunk table at end of pipable resource. */ - + /* If reading a pipable resource from a pipe and the full data + * was requested, skip the chunk table at the end so that the + * file descriptor is fully clear of the resource after this + * returns. */ cur_read_offset += chunk_table_size; ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1); if (ret) goto read_error; } ret = 0; + out_free_memory: errno_save = errno; + if (decompressor) { + wimlib_free_decompressor(rspec->wim->decompressor); + rspec->wim->decompressor = decompressor; + rspec->wim->decompressor_ctype = ctype; + rspec->wim->decompressor_max_block_size = chunk_size; + } if (chunk_offsets_malloced) FREE(chunk_offsets); if (ubuf_malloced) @@ -532,129 +556,72 @@ out_free_memory: oom: ERROR("Not enough memory available to read size=%"PRIu64" bytes " - "from compressed resource!", last_offset - first_offset + 1); + "from compressed WIM resource!", last_offset - first_offset + 1); errno = ENOMEM; ret = WIMLIB_ERR_NOMEM; goto out_free_memory; read_error: - ERROR_WITH_ERRNO("Error reading compressed file resource!"); + ERROR_WITH_ERRNO("Error reading compressed WIM resource!"); goto out_free_memory; } -/* Read raw data from a file descriptor at the specified offset. */ static int -read_raw_file_data(struct filedes *in_fd, u64 size, consume_data_callback_t cb, - u32 cb_chunk_size, void *ctx_or_buf, u64 offset) +fill_zeroes(u64 size, consume_data_callback_t cb, void *cb_ctx) { - int ret; - u8 *tmp_buf; - bool tmp_buf_malloced = false; + if (unlikely(size)) { + u8 buf[min(size, BUFFER_SIZE)]; - if (cb) { - /* Send data to callback function in chunks. */ - if (cb_chunk_size <= STACK_MAX) { - tmp_buf = alloca(cb_chunk_size); - } else { - tmp_buf = MALLOC(cb_chunk_size); - if (tmp_buf == NULL) { - ret = WIMLIB_ERR_NOMEM; - goto out; - } - tmp_buf_malloced = true; - } + memset(buf, 0, sizeof(buf)); - while (size) { - size_t bytes_to_read = min(cb_chunk_size, size); - ret = full_pread(in_fd, tmp_buf, bytes_to_read, - offset); - if (ret) - goto read_error; - ret = cb(tmp_buf, bytes_to_read, ctx_or_buf); + do { + size_t len; + int ret; + + len = min(size, BUFFER_SIZE); + ret = cb(buf, len, cb_ctx); if (ret) - goto out; - size -= bytes_to_read; - offset += bytes_to_read; - } - } else { - /* Read data directly into buffer. */ - ret = full_pread(in_fd, ctx_or_buf, size, offset); - if (ret) - goto read_error; + return ret; + size -= len; + } while (size); } - ret = 0; - goto out; - -read_error: - ERROR_WITH_ERRNO("Read error"); -out: - if (tmp_buf_malloced) - FREE(tmp_buf); - return ret; + return 0; } +/* Read raw data from a file descriptor at the specified offset, feeding the + * data it in chunks into the specified callback function. */ static int -bufferer_cb(const void *chunk, size_t size, void *_ctx) +read_raw_file_data(struct filedes *in_fd, u64 offset, u64 size, + consume_data_callback_t cb, void *cb_ctx) { - u8 **buf_p = _ctx; + u8 buf[BUFFER_SIZE]; + size_t bytes_to_read; + int ret; - *buf_p = mempcpy(*buf_p, chunk, size); + while (size) { + bytes_to_read = min(sizeof(buf), size); + ret = full_pread(in_fd, buf, bytes_to_read, offset); + if (ret) { + ERROR_WITH_ERRNO("Read error"); + return ret; + } + ret = cb(buf, bytes_to_read, cb_ctx); + if (ret) + return ret; + size -= bytes_to_read; + offset += bytes_to_read; + } return 0; } -struct rechunker_context { - u8 *buffer; - u32 buffer_filled; - u32 cb_chunk_size; - - const struct data_range *ranges; - size_t num_ranges; - size_t cur_range; - u64 range_bytes_remaining; - - consume_data_callback_t cb; - void *cb_ctx; -}; - +/* A consume_data_callback_t implementation that simply concatenates all chunks + * into a buffer. */ static int -rechunker_cb(const void *chunk, size_t size, void *_ctx) +bufferer_cb(const void *chunk, size_t size, void *_ctx) { - struct rechunker_context *ctx = _ctx; - const u8 *chunkptr = chunk; - size_t bytes_to_copy; - int ret; - - wimlib_assert(ctx->cur_range != ctx->num_ranges); - - while (size) { - bytes_to_copy = size; - - if (bytes_to_copy > ctx->cb_chunk_size - ctx->buffer_filled) - bytes_to_copy = ctx->cb_chunk_size - ctx->buffer_filled; - - if (bytes_to_copy > ctx->range_bytes_remaining - ctx->buffer_filled) - bytes_to_copy = ctx->range_bytes_remaining - ctx->buffer_filled; - - memcpy(&ctx->buffer[ctx->buffer_filled], chunkptr, bytes_to_copy); - - ctx->buffer_filled += bytes_to_copy; - chunkptr += bytes_to_copy; - size -= bytes_to_copy; - ctx->range_bytes_remaining -= bytes_to_copy; - - if (ctx->buffer_filled == ctx->cb_chunk_size || - ctx->range_bytes_remaining == 0) - { - ret = (*ctx->cb)(ctx->buffer, ctx->buffer_filled, ctx->cb_ctx); - if (ret) - return ret; - ctx->buffer_filled = 0; + u8 **buf_p = _ctx; - if (ctx->range_bytes_remaining == 0 && - ++ctx->cur_range != ctx->num_ranges) - ctx->range_bytes_remaining = ctx->ranges[ctx->cur_range].size; - } - } + *buf_p = mempcpy(*buf_p, chunk, size); return 0; } @@ -662,23 +629,21 @@ rechunker_cb(const void *chunk, size_t size, void *_ctx) * read_partial_wim_resource()- * * Read a range of data from an uncompressed or compressed resource in a WIM - * file. Data is written into a buffer or fed into a callback function, as - * documented in read_stream_prefix(). + * file. * - * By default, this function provides the uncompressed data of the resource, and - * @size and @offset and interpreted relative to the uncompressed contents of - * the resource. This behavior can be modified by either of the following - * flags: - * - * WIMLIB_READ_RESOURCE_FLAG_RAW_FULL: - * Read @size bytes at @offset of the raw contents of the compressed - * resource. In the case of pipable resources, this excludes the stream - * header. Exclusive with WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS. - * - * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS: - * Read the raw compressed chunks of the compressed resource. @size must - * be the full uncompressed size, @offset must be 0, and @cb_chunk_size - * must be the resource chunk size. + * @rspec + * Specification of the WIM resource to read from. + * @offset + * Offset within the uncompressed resource at which to start reading. + * @size + * Number of bytes to read. + * @cb + * Callback function to feed the data being read. Each call provides the + * next chunk of the requested data, uncompressed. Each chunk will be of + * nonzero size and will not cross range boundaries, but otherwise will be + * of unspecified size. + * @cb_ctx + * Parameter to pass to @cb_ctx. * * Return values: * WIMLIB_ERR_SUCCESS (0) @@ -689,138 +654,114 @@ rechunker_cb(const void *chunk, size_t size, void *_ctx) * * or other error code returned by the @cb function. */ -int -read_partial_wim_resource(const struct wim_lookup_table_entry *lte, - u64 size, consume_data_callback_t cb, - u32 cb_chunk_size, void *ctx_or_buf, - int flags, u64 offset) +static int +read_partial_wim_resource(const struct wim_resource_spec *rspec, + u64 offset, u64 size, + consume_data_callback_t cb, void *cb_ctx) { - const struct wim_resource_spec *rspec; - struct filedes *in_fd; - - /* Verify parameters. */ - wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); - rspec = lte->rspec; - in_fd = &rspec->wim->in_fd; - if (cb) - wimlib_assert(is_power_of_2(cb_chunk_size)); - if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) { - /* Raw chunks mode is subject to the restrictions noted. */ - wimlib_assert(!lte_is_partial(lte)); - wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL)); - wimlib_assert(cb_chunk_size == rspec->cchunk_size); - wimlib_assert(size == lte->size); - wimlib_assert(offset == 0); - } else if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) { - /* Raw full mode: read must not overrun end of store size. */ - wimlib_assert(!lte_is_partial(lte)); - wimlib_assert(offset + size >= size && - offset + size <= rspec->size_in_wim); + /* Sanity checks. */ + wimlib_assert(offset + size >= offset); + wimlib_assert(offset + size <= rspec->uncompressed_size); + + DEBUG("Reading %"PRIu64" @ %"PRIu64" from WIM resource " + "%"PRIu64" => %"PRIu64" @ %"PRIu64, + size, offset, rspec->uncompressed_size, + rspec->size_in_wim, rspec->offset_in_wim); + + /* Trivial case. */ + if (size == 0) + return 0; + + if (resource_is_compressed(rspec)) { + struct data_range range = { + .offset = offset, + .size = size, + }; + return read_compressed_wim_resource(rspec, &range, 1, + cb, cb_ctx); } else { - /* Normal mode: read must not overrun end of original size. */ - wimlib_assert(offset + size >= size && - offset + size <= lte->size); - } + /* Reading uncompressed resource. For completeness, handle the + * weird case where size_in_wim < uncompressed_size. */ - DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64"[+%"PRIu64"] " - "from %"PRIu64"(%"PRIu64") @ +%"PRIu64" " - "(readflags 0x%08x, resflags 0x%02x%s)", - size, offset, lte->offset_in_res, - rspec->size_in_wim, - rspec->uncompressed_size, - rspec->offset_in_wim, - flags, lte->flags, - (rspec->is_pipable ? ", pipable" : "")); - - if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) || - rspec->ctype == WIMLIB_COMPRESSION_TYPE_NONE) - { - return read_raw_file_data(in_fd, - size, - cb, - cb_chunk_size, - ctx_or_buf, - rspec->offset_in_wim + lte->offset_in_res + offset); - } else { - bool raw_chunks; - struct data_range range; - consume_data_callback_t internal_cb; - void *internal_cb_ctx; - u8 *buf; - bool rechunker_buf_malloced = false; - struct rechunker_context *rechunker_ctx; + u64 read_size; + u64 zeroes_size; int ret; - if (size == 0) - return 0; - - range.offset = lte->offset_in_res + offset; - range.size = size; - raw_chunks = !!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS); - - if (cb != NULL && - cb_chunk_size == rspec->cchunk_size && - !(rspec->flags & WIM_RESHDR_FLAG_CONCAT)) + if (likely(offset + size <= rspec->size_in_wim) || + rspec->is_pipable) { - internal_cb = cb; - internal_cb_ctx = ctx_or_buf; - } else if (cb == NULL) { - buf = ctx_or_buf; - internal_cb = bufferer_cb; - internal_cb_ctx = &buf; + read_size = size; + zeroes_size = 0; } else { - rechunker_ctx = alloca(sizeof(struct rechunker_context)); - - if (cb_chunk_size <= STACK_MAX) { - rechunker_ctx->buffer = alloca(cb_chunk_size); + if (offset >= rspec->size_in_wim) { + read_size = 0; + zeroes_size = size; } else { - rechunker_ctx->buffer = MALLOC(cb_chunk_size); - if (rechunker_ctx->buffer == NULL) - return WIMLIB_ERR_NOMEM; - rechunker_buf_malloced = true; + read_size = rspec->size_in_wim - offset; + zeroes_size = offset + size - rspec->size_in_wim; } - rechunker_ctx->buffer_filled = 0; - rechunker_ctx->cb_chunk_size = cb_chunk_size; - - rechunker_ctx->ranges = ⦥ - rechunker_ctx->num_ranges = 1; - rechunker_ctx->cur_range = 0; - rechunker_ctx->range_bytes_remaining = range.size; - - rechunker_ctx->cb = cb; - rechunker_ctx->cb_ctx = ctx_or_buf; - - internal_cb = rechunker_cb; - internal_cb_ctx = rechunker_ctx; } - ret = read_compressed_wim_resource(rspec, &range, 1, - internal_cb, internal_cb_ctx, - raw_chunks); - if (rechunker_buf_malloced) - FREE(rechunker_ctx->buffer); + ret = read_raw_file_data(&rspec->wim->in_fd, + rspec->offset_in_wim + offset, + read_size, + cb, + cb_ctx); + if (ret) + return ret; - return ret; + return fill_zeroes(zeroes_size, cb, cb_ctx); } } +/* Read the specified range of uncompressed data from the specified stream, + * which must be located into a WIM file, into the specified buffer. */ int read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte, - size_t size, u64 offset, void *buf) + size_t size, u64 offset, void *_buf) +{ + u8 *buf = _buf; + + wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); + + return read_partial_wim_resource(lte->rspec, + lte->offset_in_res + offset, + size, + bufferer_cb, + &buf); +} + +/* A consume_data_callback_t implementation that simply ignores the data + * received. */ +static int +skip_chunk_cb(const void *chunk, size_t size, void *_ctx) +{ + return 0; +} + +/* Skip over the data of the specified stream, which must correspond to a full + * WIM resource. */ +int +skip_wim_stream(struct wim_lookup_table_entry *lte) { - return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset); + wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); + wimlib_assert(!(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS)); + DEBUG("Skipping stream (size=%"PRIu64")", lte->size); + return read_partial_wim_resource(lte->rspec, + 0, + lte->rspec->uncompressed_size, + skip_chunk_cb, + NULL); } static int read_wim_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, u32 cb_chunk_size, - void *ctx_or_buf, int flags) + consume_data_callback_t cb, void *cb_ctx) { - return read_partial_wim_resource(lte, size, cb, cb_chunk_size, - ctx_or_buf, flags, 0); + return read_partial_wim_resource(lte->rspec, lte->offset_in_res, size, + cb, cb_ctx); } -#ifndef __WIN32__ /* This function handles reading stream data that is located in an external * file, such as a file that has been added to the WIM image through execution * of a wimlib_add_command. @@ -828,65 +769,72 @@ read_wim_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, * This assumes the file can be accessed using the standard POSIX open(), * read(), and close(). On Windows this will not necessarily be the case (since * the file may need FILE_FLAG_BACKUP_SEMANTICS to be opened, or the file may be - * encrypted), so Windows uses its own code for its equivalent case. - */ + * encrypted), so Windows uses its own code for its equivalent case. */ static int read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, u32 cb_chunk_size, - void *ctx_or_buf, int _ignored_flags) + consume_data_callback_t cb, void *cb_ctx) { int ret; int raw_fd; struct filedes fd; wimlib_assert(size <= lte->size); + DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, lte->file_on_disk); - raw_fd = open(lte->file_on_disk, O_BINARY | O_RDONLY); + raw_fd = topen(lte->file_on_disk, O_BINARY | O_RDONLY); if (raw_fd < 0) { ERROR_WITH_ERRNO("Can't open \"%"TS"\"", lte->file_on_disk); return WIMLIB_ERR_OPEN; } filedes_init(&fd, raw_fd); - ret = read_raw_file_data(&fd, size, cb, cb_chunk_size, ctx_or_buf, 0); + ret = read_raw_file_data(&fd, 0, size, cb, cb_ctx); filedes_close(&fd); return ret; } -#endif /* !__WIN32__ */ -/* This function handles the trivial case of reading stream data that is, in - * fact, already located in an in-memory buffer. */ +#ifdef WITH_FUSE static int -read_buffer_prefix(const struct wim_lookup_table_entry *lte, - u64 size, consume_data_callback_t cb, - u32 cb_chunk_size, void *ctx_or_buf, int _ignored_flags) +read_staging_file_prefix(const struct wim_lookup_table_entry *lte, u64 size, + consume_data_callback_t cb, void *cb_ctx) { + int raw_fd; + struct filedes fd; + int ret; + wimlib_assert(size <= lte->size); - if (cb) { - /* Feed the data into the callback function in - * appropriately-sized chunks. */ - int ret; - u32 chunk_size; + DEBUG("Reading %"PRIu64" bytes from staging file \"%s\"", + size, lte->staging_file_name); - for (u64 offset = 0; offset < size; offset += chunk_size) { - chunk_size = min(cb_chunk_size, size - offset); - ret = cb((const u8*)lte->attached_buffer + offset, - chunk_size, ctx_or_buf); - if (ret) - return ret; - } - } else { - /* Copy the data directly into the specified buffer. */ - memcpy(ctx_or_buf, lte->attached_buffer, size); + raw_fd = openat(lte->staging_dir_fd, lte->staging_file_name, + O_RDONLY | O_NOFOLLOW); + if (raw_fd < 0) { + ERROR_WITH_ERRNO("Can't open staging file \"%s\"", + lte->staging_file_name); + return WIMLIB_ERR_OPEN; } - return 0; + filedes_init(&fd, raw_fd); + ret = read_raw_file_data(&fd, 0, size, cb, cb_ctx); + filedes_close(&fd); + return ret; +} +#endif + +/* This function handles the trivial case of reading stream data that is, in + * fact, already located in an in-memory buffer. */ +static int +read_buffer_prefix(const struct wim_lookup_table_entry *lte, + u64 size, consume_data_callback_t cb, void *cb_ctx) +{ + wimlib_assert(size <= lte->size); + return (*cb)(lte->attached_buffer, size, cb_ctx); } typedef int (*read_stream_prefix_handler_t)(const struct wim_lookup_table_entry *lte, - u64 size, consume_data_callback_t cb, - u32 cb_chunk_size, void *ctx_or_buf, - int flags); + u64 size, + consume_data_callback_t cb, + void *cb_ctx); /* * read_stream_prefix()- @@ -895,68 +843,49 @@ typedef int (*read_stream_prefix_handler_t)(const struct wim_lookup_table_entry * any one of several locations, such as in a WIM file (compressed or * uncompressed), in an external file, or directly in an in-memory buffer. * - * This function feeds the data either to a callback function (@cb != NULL, - * passing it @ctx_or_buf), or write it directly into a buffer (@cb == NULL, - * @ctx_or_buf specifies the buffer, which must have room for at least @size - * bytes). - * - * When (@cb != NULL), @cb_chunk_size specifies the maximum size of data chunks - * to feed the callback function. @cb_chunk_size must be positive, and if the - * stream is in a WIM file, must be a power of 2. All chunks, except possibly - * the last one, will be this size. If (@cb == NULL), @cb_chunk_size is - * ignored. - * - * If the stream is located in a WIM file, @flags can be set as documented in - * read_partial_wim_resource(). Otherwise @flags are ignored. + * This function feeds the data to a callback function @cb in chunks of + * unspecified size. * * Returns 0 on success; nonzero on error. A nonzero value will be returned if * the stream data cannot be successfully read (for a number of different - * reasons, depending on the stream location), or if a callback function was - * specified and it returned nonzero. + * reasons, depending on the stream location), or if @cb returned nonzero in + * which case that error code will be returned. */ -int +static int read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, u32 cb_chunk_size, - void *ctx_or_buf, int flags) + consume_data_callback_t cb, void *cb_ctx) { - /* This function merely verifies several preconditions, then passes - * control to an appropriate function for understanding each possible - * stream location. */ static const read_stream_prefix_handler_t handlers[] = { [RESOURCE_IN_WIM] = read_wim_stream_prefix, - #ifdef __WIN32__ - [RESOURCE_IN_FILE_ON_DISK] = read_win32_file_prefix, - #else [RESOURCE_IN_FILE_ON_DISK] = read_file_on_disk_prefix, - #endif [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix, #ifdef WITH_FUSE - [RESOURCE_IN_STAGING_FILE] = read_file_on_disk_prefix, + [RESOURCE_IN_STAGING_FILE] = read_staging_file_prefix, #endif #ifdef WITH_NTFS_3G [RESOURCE_IN_NTFS_VOLUME] = read_ntfs_file_prefix, #endif #ifdef __WIN32__ + [RESOURCE_IN_WINNT_FILE_ON_DISK] = read_winnt_file_prefix, [RESOURCE_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix, #endif }; wimlib_assert(lte->resource_location < ARRAY_LEN(handlers) && handlers[lte->resource_location] != NULL); - wimlib_assert(cb == NULL || cb_chunk_size > 0); - return handlers[lte->resource_location](lte, size, cb, cb_chunk_size, - ctx_or_buf, flags); + return handlers[lte->resource_location](lte, size, cb, cb_ctx); } /* Read the full uncompressed data of the specified stream into the specified * buffer, which must have space for at least lte->size bytes. */ int -read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *buf) +read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *_buf) { - return read_stream_prefix(lte, lte->size, NULL, 0, buf, 0); + u8 *buf = _buf; + return read_stream_prefix(lte, lte->size, bufferer_cb, &buf); } -/* Read the full uncompressed data of the specified stream. A buffer sufficient - * to hold the data is allocated and returned in @buf_ret. */ +/* Retrieve the full uncompressed data of the specified stream. A buffer large + * enough hold the data is allocated and returned in @buf_ret. */ int read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte, void **buf_ret) @@ -984,7 +913,8 @@ read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte, return 0; } -/* Retrieve the full uncompressed data of the specified WIM resource. */ +/* Retrieve the full uncompressed data of the specified WIM resource. A buffer + * large enough hold the data is allocated and returned in @buf_ret. */ static int wim_resource_spec_to_data(struct wim_resource_spec *rspec, void **buf_ret) { @@ -995,7 +925,6 @@ wim_resource_spec_to_data(struct wim_resource_spec *rspec, void **buf_ret) if (lte == NULL) return WIMLIB_ERR_NOMEM; - lte->unhashed = 1; lte_bind_wim_resource_spec(lte, rspec); lte->flags = rspec->flags; lte->size = rspec->uncompressed_size; @@ -1008,71 +937,346 @@ wim_resource_spec_to_data(struct wim_resource_spec *rspec, void **buf_ret) return ret; } -/* Retrieve the full uncompressed data of the specified WIM resource. */ +/* Retrieve the full uncompressed data of a WIM resource specified as a raw + * `wim_reshdr' and the corresponding WIM file. A large enough hold the data is + * allocated and returned in @buf_ret. */ int wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim, void **buf_ret) { DEBUG("offset_in_wim=%"PRIu64", size_in_wim=%"PRIu64", " "uncompressed_size=%"PRIu64, - reshdr->offset_in_wim, reshdr->size_in_wim, reshdr->uncompressed_size); + reshdr->offset_in_wim, reshdr->size_in_wim, + reshdr->uncompressed_size); struct wim_resource_spec rspec; wim_res_hdr_to_spec(reshdr, wim, &rspec); return wim_resource_spec_to_data(&rspec, buf_ret); } -struct read_stream_list_ctx { - read_stream_list_begin_stream_t begin_stream; - consume_data_callback_t consume_chunk; - read_stream_list_end_stream_t end_stream; - void *begin_stream_ctx; - void *consume_chunk_ctx; - void *end_stream_ctx; +int +wim_reshdr_to_hash(const struct wim_reshdr *reshdr, WIMStruct *wim, + u8 hash[SHA1_HASH_SIZE]) +{ + struct wim_resource_spec rspec; + int ret; + struct wim_lookup_table_entry *lte; + + wim_res_hdr_to_spec(reshdr, wim, &rspec); + + lte = new_lookup_table_entry(); + if (lte == NULL) + return WIMLIB_ERR_NOMEM; + + lte_bind_wim_resource_spec(lte, &rspec); + lte->flags = rspec.flags; + lte->size = rspec.uncompressed_size; + lte->offset_in_res = 0; + lte->unhashed = 1; + + ret = sha1_stream(lte); + + lte_unbind_wim_resource_spec(lte); + copy_hash(hash, lte->hash); + free_lookup_table_entry(lte); + return ret; +} + +struct streamifier_context { + struct read_stream_list_callbacks cbs; struct wim_lookup_table_entry *cur_stream; + struct wim_lookup_table_entry *next_stream; u64 cur_stream_offset; struct wim_lookup_table_entry *final_stream; size_t list_head_offset; }; +static struct wim_lookup_table_entry * +next_stream(struct wim_lookup_table_entry *lte, size_t list_head_offset) +{ + struct list_head *cur; + + cur = (struct list_head*)((u8*)lte + list_head_offset); + + return (struct wim_lookup_table_entry*)((u8*)cur->next - list_head_offset); +} + +/* A consume_data_callback_t implementation that translates raw resource data + * into streams, calling the begin_stream, consume_chunk, and end_stream + * callback functions as appropriate. */ static int -read_stream_list_wrapper_cb(const void *chunk, size_t size, void *_ctx) +streamifier_cb(const void *chunk, size_t size, void *_ctx) { - struct read_stream_list_ctx *ctx = _ctx; + struct streamifier_context *ctx = _ctx; int ret; + DEBUG("%zu bytes passed to streamifier", size); + + wimlib_assert(ctx->cur_stream != NULL); + wimlib_assert(size <= ctx->cur_stream->size - ctx->cur_stream_offset); + if (ctx->cur_stream_offset == 0) { + u32 flags; + /* Starting a new stream. */ - ret = (*ctx->begin_stream)(ctx->cur_stream, ctx->begin_stream_ctx); + DEBUG("Begin new stream (size=%"PRIu64").", + ctx->cur_stream->size); + + flags = BEGIN_STREAM_FLAG_PARTIAL_RESOURCE; + if (size == ctx->cur_stream->size) + flags |= BEGIN_STREAM_FLAG_WHOLE_STREAM; + ret = (*ctx->cbs.begin_stream)(ctx->cur_stream, + flags, + ctx->cbs.begin_stream_ctx); if (ret) return ret; } - ret = (*ctx->consume_chunk)(chunk, size, ctx->consume_chunk_ctx); + /* Consume the chunk. */ + ret = (*ctx->cbs.consume_chunk)(chunk, size, + ctx->cbs.consume_chunk_ctx); + ctx->cur_stream_offset += size; if (ret) return ret; - ctx->cur_stream_offset += size; - if (ctx->cur_stream_offset == ctx->cur_stream->size) { - /* Finished reading all the data for a stream; advance - * to the next one. */ - ret = (*ctx->end_stream)(ctx->cur_stream, ctx->end_stream_ctx); + /* Finished reading all the data for a stream. */ + + ctx->cur_stream_offset = 0; + + DEBUG("End stream (size=%"PRIu64").", ctx->cur_stream->size); + ret = (*ctx->cbs.end_stream)(ctx->cur_stream, 0, + ctx->cbs.end_stream_ctx); if (ret) return ret; - if (ctx->cur_stream == ctx->final_stream) - return 0; + /* Advance to next stream. */ + ctx->cur_stream = ctx->next_stream; + if (ctx->cur_stream != NULL) { + if (ctx->cur_stream != ctx->final_stream) + ctx->next_stream = next_stream(ctx->cur_stream, + ctx->list_head_offset); + else + ctx->next_stream = NULL; + } + } + return 0; +} + +struct hasher_context { + SHA_CTX sha_ctx; + int flags; + struct read_stream_list_callbacks cbs; +}; - struct list_head *cur = (struct list_head *) - ((u8*)ctx->cur_stream + ctx->list_head_offset); - struct list_head *next = cur->next; +/* Callback for starting to read a stream while calculating its SHA1 message + * digest. */ +static int +hasher_begin_stream(struct wim_lookup_table_entry *lte, u32 flags, + void *_ctx) +{ + struct hasher_context *ctx = _ctx; - ctx->cur_stream = (struct wim_lookup_table_entry *) - ((u8*)next - ctx->list_head_offset); + sha1_init(&ctx->sha_ctx); - ctx->cur_stream_offset = 0; + if (ctx->cbs.begin_stream == NULL) + return 0; + else + return (*ctx->cbs.begin_stream)(lte, flags, + ctx->cbs.begin_stream_ctx); +} + +/* A consume_data_callback_t implementation that continues calculating the SHA1 + * message digest of the stream being read, then optionally passes the data on + * to another consume_data_callback_t implementation. This allows checking the + * SHA1 message digest of a stream being extracted, for example. */ +static int +hasher_consume_chunk(const void *chunk, size_t size, void *_ctx) +{ + struct hasher_context *ctx = _ctx; + + sha1_update(&ctx->sha_ctx, chunk, size); + if (ctx->cbs.consume_chunk == NULL) + return 0; + else + return (*ctx->cbs.consume_chunk)(chunk, size, ctx->cbs.consume_chunk_ctx); +} + +static void +get_sha1_string(const u8 md[SHA1_HASH_SIZE], tchar *str) +{ + for (size_t i = 0; i < SHA1_HASH_SIZE; i++) + str += tsprintf(str, T("%02x"), md[i]); +} + +/* Callback for finishing reading a stream while calculating its SHA1 message + * digest. */ +static int +hasher_end_stream(struct wim_lookup_table_entry *lte, int status, void *_ctx) +{ + struct hasher_context *ctx = _ctx; + u8 hash[SHA1_HASH_SIZE]; + int ret; + + if (status) { + /* Error occurred; the full stream may not have been read. */ + ret = status; + goto out_next_cb; } - return 0; + + /* Retrieve the final SHA1 message digest. */ + sha1_final(hash, &ctx->sha_ctx); + + if (lte->unhashed) { + if (ctx->flags & COMPUTE_MISSING_STREAM_HASHES) { + /* No SHA1 message digest was previously present for the + * stream. Set it to the one just calculated. */ + DEBUG("Set SHA1 message digest for stream " + "(size=%"PRIu64").", lte->size); + copy_hash(lte->hash, hash); + } + } else { + if (ctx->flags & VERIFY_STREAM_HASHES) { + /* The stream already had a SHA1 message digest present. Verify + * that it is the same as the calculated value. */ + if (!hashes_equal(hash, lte->hash)) { + if (wimlib_print_errors) { + tchar expected_hashstr[SHA1_HASH_SIZE * 2 + 1]; + tchar actual_hashstr[SHA1_HASH_SIZE * 2 + 1]; + get_sha1_string(lte->hash, expected_hashstr); + get_sha1_string(hash, actual_hashstr); + ERROR("The stream is corrupted!\n" + " (Expected SHA1=%"TS",\n" + " got SHA1=%"TS")", + expected_hashstr, actual_hashstr); + } + ret = WIMLIB_ERR_INVALID_RESOURCE_HASH; + errno = EINVAL; + goto out_next_cb; + } + DEBUG("SHA1 message digest okay for " + "stream (size=%"PRIu64").", lte->size); + } + } + ret = 0; +out_next_cb: + if (ctx->cbs.end_stream == NULL) + return ret; + else + return (*ctx->cbs.end_stream)(lte, ret, ctx->cbs.end_stream_ctx); +} + +static int +read_full_stream_with_cbs(struct wim_lookup_table_entry *lte, + const struct read_stream_list_callbacks *cbs) +{ + int ret; + + ret = (*cbs->begin_stream)(lte, 0, cbs->begin_stream_ctx); + if (ret) + return ret; + + ret = read_stream_prefix(lte, lte->size, cbs->consume_chunk, + cbs->consume_chunk_ctx); + + return (*cbs->end_stream)(lte, ret, cbs->end_stream_ctx); +} + +/* Read the full data of the specified stream, passing the data into the + * specified callbacks (all of which are optional) and either checking or + * computing the SHA1 message digest of the stream. */ +static int +read_full_stream_with_sha1(struct wim_lookup_table_entry *lte, + const struct read_stream_list_callbacks *cbs) +{ + struct hasher_context hasher_ctx = { + .flags = VERIFY_STREAM_HASHES | COMPUTE_MISSING_STREAM_HASHES, + .cbs = *cbs, + }; + struct read_stream_list_callbacks hasher_cbs = { + .begin_stream = hasher_begin_stream, + .begin_stream_ctx = &hasher_ctx, + .consume_chunk = hasher_consume_chunk, + .consume_chunk_ctx = &hasher_ctx, + .end_stream = hasher_end_stream, + .end_stream_ctx = &hasher_ctx, + }; + return read_full_stream_with_cbs(lte, &hasher_cbs); +} + +static int +read_packed_streams(struct wim_lookup_table_entry *first_stream, + struct wim_lookup_table_entry *last_stream, + u64 stream_count, + size_t list_head_offset, + const struct read_stream_list_callbacks *sink_cbs) +{ + struct data_range *ranges; + bool ranges_malloced; + struct wim_lookup_table_entry *cur_stream; + size_t i; + int ret; + u64 ranges_alloc_size; + + DEBUG("Reading %"PRIu64" streams combined in same WIM resource", + stream_count); + + /* Setup data ranges array (one range per stream to read); this way + * read_compressed_wim_resource() does not need to be aware of streams. + */ + + ranges_alloc_size = stream_count * sizeof(ranges[0]); + + if (unlikely((size_t)ranges_alloc_size != ranges_alloc_size)) { + ERROR("Too many streams in one resource!"); + return WIMLIB_ERR_NOMEM; + } + if (likely(ranges_alloc_size <= STACK_MAX)) { + ranges = alloca(ranges_alloc_size); + ranges_malloced = false; + } else { + ranges = MALLOC(ranges_alloc_size); + if (ranges == NULL) { + ERROR("Too many streams in one resource!"); + return WIMLIB_ERR_NOMEM; + } + ranges_malloced = true; + } + + for (i = 0, cur_stream = first_stream; + i < stream_count; + i++, cur_stream = next_stream(cur_stream, list_head_offset)) + { + ranges[i].offset = cur_stream->offset_in_res; + ranges[i].size = cur_stream->size; + } + + struct streamifier_context streamifier_ctx = { + .cbs = *sink_cbs, + .cur_stream = first_stream, + .next_stream = next_stream(first_stream, list_head_offset), + .cur_stream_offset = 0, + .final_stream = last_stream, + .list_head_offset = list_head_offset, + }; + + ret = read_compressed_wim_resource(first_stream->rspec, + ranges, + stream_count, + streamifier_cb, + &streamifier_ctx); + + if (ranges_malloced) + FREE(ranges); + + if (ret) { + if (streamifier_ctx.cur_stream_offset != 0) { + ret = (*streamifier_ctx.cbs.end_stream) + (streamifier_ctx.cur_stream, + ret, + streamifier_ctx.cbs.end_stream_ctx); + } + } + return ret; } /* @@ -1088,18 +1292,27 @@ read_stream_list_wrapper_cb(const void *chunk, size_t size, void *_ctx) * @list_head_offset * Offset of the `struct list_head' within each `struct * wim_lookup_table_entry' that makes up the @stream_list. - * @begin_stream - * Callback for starting to process a stream. - * @consume_chunk - * Callback for receiving a chunk of stream data. - * @end_stream - * Callback for finishing the processing of a stream. - * @cb_chunk_size - * Size of chunks to provide to @consume_chunk. For a given stream, all - * the chunks will be this size, except possibly the last which will be the - * remainder. - * @cb_ctx - * Parameter to pass to the callback functions. + * @cbs + * Callback functions to accept the stream data. + * @flags + * Bitwise OR of zero or more of the following flags: + * + * VERIFY_STREAM_HASHES: + * For all streams being read that have already had SHA1 message + * digests computed, calculate the SHA1 message digest of the read + * data and compare it with the previously computed value. If they + * do not match, return WIMLIB_ERR_INVALID_RESOURCE_HASH. + * + * COMPUTE_MISSING_STREAM_HASHES + * For all streams being read that have not yet had their SHA1 + * message digests computed, calculate and save their SHA1 message + * digests. + * + * STREAM_LIST_ALREADY_SORTED + * @stream_list is already sorted in sequential order for reading. + * + * The callback functions are allowed to delete the current stream from the list + * if necessary. * * Returns 0 on success; a nonzero error code on failure. Failure can occur due * to an error reading the data or due to an error status being returned by any @@ -1108,19 +1321,39 @@ read_stream_list_wrapper_cb(const void *chunk, size_t size, void *_ctx) int read_stream_list(struct list_head *stream_list, size_t list_head_offset, - read_stream_list_begin_stream_t begin_stream, - consume_data_callback_t consume_chunk, - read_stream_list_end_stream_t end_stream, - u32 cb_chunk_size, - void *cb_ctx) + const struct read_stream_list_callbacks *cbs, + int flags) { int ret; struct list_head *cur, *next; struct wim_lookup_table_entry *lte; + struct hasher_context *hasher_ctx; + struct read_stream_list_callbacks *sink_cbs; - ret = sort_stream_list_by_sequential_order(stream_list, list_head_offset); - if (ret) - return ret; + if (!(flags & STREAM_LIST_ALREADY_SORTED)) { + ret = sort_stream_list_by_sequential_order(stream_list, list_head_offset); + if (ret) + return ret; + } + + if (flags & (VERIFY_STREAM_HASHES | COMPUTE_MISSING_STREAM_HASHES)) { + hasher_ctx = alloca(sizeof(*hasher_ctx)); + *hasher_ctx = (struct hasher_context) { + .flags = flags, + .cbs = *cbs, + }; + sink_cbs = alloca(sizeof(*sink_cbs)); + *sink_cbs = (struct read_stream_list_callbacks) { + .begin_stream = hasher_begin_stream, + .begin_stream_ctx = hasher_ctx, + .consume_chunk = hasher_consume_chunk, + .consume_chunk_ctx = hasher_ctx, + .end_stream = hasher_end_stream, + .end_stream_ctx = hasher_ctx, + }; + } else { + sink_cbs = (struct read_stream_list_callbacks*)cbs; + } for (cur = stream_list->next, next = cur->next; cur != stream_list; @@ -1128,11 +1361,13 @@ read_stream_list(struct list_head *stream_list, { lte = (struct wim_lookup_table_entry*)((u8*)cur - list_head_offset); - if (lte_is_partial(lte)) { + if (lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS && + lte->size != lte->rspec->uncompressed_size) + { struct wim_lookup_table_entry *lte_next, *lte_last; struct list_head *next2; - size_t stream_count; + u64 stream_count; /* The next stream is a proper sub-sequence of a WIM * resource. See if there are other streams in the same @@ -1161,204 +1396,115 @@ read_stream_list(struct list_head *stream_list, * first stream in the resource that needs to be * read and @lte_last specifies the last stream * in the resource that needs to be read. */ - next = next2; - - struct data_range ranges[stream_count]; - - { - struct list_head *next3; - size_t i; - struct wim_lookup_table_entry *lte_cur; - - next3 = cur; - for (i = 0; i < stream_count; i++) { - lte_cur = (struct wim_lookup_table_entry*) - ((u8*)next3 - list_head_offset); - ranges[i].offset = lte_cur->offset_in_res; - ranges[i].size = lte_cur->size; - next3 = next3->next; - } - } - - struct rechunker_context rechunker_ctx = { - .buffer = MALLOC(cb_chunk_size), - .buffer_filled = 0, - .cb_chunk_size = cb_chunk_size, - .ranges = ranges, - .num_ranges = stream_count, - .cur_range = 0, - .range_bytes_remaining = ranges[0].size, - .cb = consume_chunk, - .cb_ctx = cb_ctx, - }; - - if (rechunker_ctx.buffer == NULL) - return WIMLIB_ERR_NOMEM; - - struct read_stream_list_ctx ctx = { - .begin_stream = begin_stream, - .begin_stream_ctx = cb_ctx, - .consume_chunk = rechunker_cb, - .consume_chunk_ctx = &rechunker_ctx, - .end_stream = end_stream, - .end_stream_ctx = cb_ctx, - .cur_stream = lte, - .cur_stream_offset = 0, - .final_stream = lte_last, - .list_head_offset = list_head_offset, - }; - - ret = read_compressed_wim_resource(lte->rspec, - ranges, - stream_count, - read_stream_list_wrapper_cb, - &ctx, - false); - FREE(rechunker_ctx.buffer); + ret = read_packed_streams(lte, lte_last, + stream_count, + list_head_offset, + sink_cbs); if (ret) return ret; continue; } } - ret = (*begin_stream)(lte, cb_ctx); - if (ret) - return ret; - ret = read_stream_prefix(lte, lte->size, consume_chunk, - cb_chunk_size, cb_ctx, 0); - if (ret) - return ret; - - ret = (*end_stream)(lte, cb_ctx); - if (ret) + ret = read_full_stream_with_cbs(lte, sink_cbs); + if (ret && ret != BEGIN_STREAM_STATUS_SKIP_STREAM) return ret; } return 0; } -struct extract_ctx { - SHA_CTX sha_ctx; - consume_data_callback_t extract_chunk; - void *extract_chunk_arg; -}; - -static int -extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size, void *_ctx) -{ - struct extract_ctx *ctx = _ctx; - - sha1_update(&ctx->sha_ctx, chunk, chunk_size); - return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg); -} - -/* Extracts the first @size bytes of a stream to somewhere. In the process, the - * SHA1 message digest of the uncompressed stream is checked if the full stream - * is being extracted. +/* Extract the first @size bytes of the specified stream. * - * @extract_chunk is a function that will be called to extract each chunk of the - * stream. */ + * If @size specifies the full uncompressed size of the stream, then the SHA1 + * message digest of the uncompressed stream is checked while being extracted. + * + * The uncompressed data of the resource is passed in chunks of unspecified size + * to the @extract_chunk function, passing it @extract_chunk_arg. */ int -extract_stream(const struct wim_lookup_table_entry *lte, u64 size, +extract_stream(struct wim_lookup_table_entry *lte, u64 size, consume_data_callback_t extract_chunk, void *extract_chunk_arg) { - int ret; + wimlib_assert(size <= lte->size); if (size == lte->size) { - /* Do SHA1 */ - struct extract_ctx ctx; - ctx.extract_chunk = extract_chunk; - ctx.extract_chunk_arg = extract_chunk_arg; - sha1_init(&ctx.sha_ctx); - ret = read_stream_prefix(lte, size, - extract_chunk_sha1_wrapper, - lte_cchunk_size(lte), - &ctx, 0); - if (ret == 0) { - u8 hash[SHA1_HASH_SIZE]; - sha1_final(hash, &ctx.sha_ctx); - if (!hashes_equal(hash, lte->hash)) { - if (wimlib_print_errors) { - ERROR("Invalid SHA1 message digest " - "on the following WIM stream:"); - print_lookup_table_entry(lte, stderr); - if (lte->resource_location == RESOURCE_IN_WIM) - ERROR("The WIM file appears to be corrupt!"); - } - ret = WIMLIB_ERR_INVALID_RESOURCE_HASH; - } - } + /* Do SHA1. */ + struct read_stream_list_callbacks cbs = { + .consume_chunk = extract_chunk, + .consume_chunk_ctx = extract_chunk_arg, + }; + return read_full_stream_with_sha1(lte, &cbs); } else { - /* Don't do SHA1 */ - ret = read_stream_prefix(lte, size, extract_chunk, - lte_cchunk_size(lte), - extract_chunk_arg, 0); + /* Don't do SHA1. */ + return read_stream_prefix(lte, size, extract_chunk, + extract_chunk_arg); } - return ret; } -static int -extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p) +/* A consume_data_callback_t implementation that writes the chunk of data to a + * file descriptor. */ +int +extract_chunk_to_fd(const void *chunk, size_t size, void *_fd_p) { struct filedes *fd = _fd_p; - int ret = full_write(fd, buf, len); - if (ret) + + int ret = full_write(fd, chunk, size); + if (ret) { ERROR_WITH_ERRNO("Error writing to file descriptor"); - return ret; + return ret; + } + return 0; } /* Extract the first @size bytes of the specified stream to the specified file - * descriptor. If @size is the full size of the stream, its SHA1 message digest - * is also checked. */ + * descriptor. */ int -extract_stream_to_fd(const struct wim_lookup_table_entry *lte, +extract_stream_to_fd(struct wim_lookup_table_entry *lte, struct filedes *fd, u64 size) { - return extract_stream(lte, size, extract_wim_chunk_to_fd, fd); + return extract_stream(lte, size, extract_chunk_to_fd, fd); } - -static int -sha1_chunk(const void *buf, size_t len, void *ctx) +/* Extract the full uncompressed contents of the specified stream to the + * specified file descriptor. */ +int +extract_full_stream_to_fd(struct wim_lookup_table_entry *lte, + struct filedes *fd) { - sha1_update(ctx, buf, len); - return 0; + return extract_stream_to_fd(lte, fd, lte->size); } -/* Calculate the SHA1 message digest of a stream, storing it in @lte->hash. */ +/* Calculate the SHA1 message digest of a stream and store it in @lte->hash. */ int sha1_stream(struct wim_lookup_table_entry *lte) { - int ret; - SHA_CTX sha_ctx; - - sha1_init(&sha_ctx); - ret = read_stream_prefix(lte, lte->size, - sha1_chunk, lte_cchunk_size(lte), - &sha_ctx, 0); - if (ret == 0) - sha1_final(lte->hash, &sha_ctx); - - return ret; + wimlib_assert(lte->unhashed); + struct read_stream_list_callbacks cbs = { + }; + return read_full_stream_with_sha1(lte, &cbs); } -/* Convert a WIM resource header to a stand-alone resource specification. */ +/* Convert a short WIM resource header to a stand-alone WIM resource + * specification. + * + * Note: for packed resources some fields still need to be overridden. + */ void wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim, - struct wim_resource_spec *spec) + struct wim_resource_spec *rspec) { - spec->wim = wim; - spec->offset_in_wim = reshdr->offset_in_wim; - spec->size_in_wim = reshdr->size_in_wim; - spec->uncompressed_size = reshdr->uncompressed_size; - INIT_LIST_HEAD(&spec->lte_list); - spec->flags = reshdr->flags; - spec->is_pipable = wim_is_pipable(wim); - if (spec->flags & (WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_CONCAT)) { - spec->ctype = wim->compression_type; - spec->cchunk_size = wim->chunk_size; + rspec->wim = wim; + rspec->offset_in_wim = reshdr->offset_in_wim; + rspec->size_in_wim = reshdr->size_in_wim; + rspec->uncompressed_size = reshdr->uncompressed_size; + INIT_LIST_HEAD(&rspec->stream_list); + rspec->flags = reshdr->flags; + rspec->is_pipable = wim_is_pipable(wim); + if (rspec->flags & WIM_RESHDR_FLAG_COMPRESSED) { + rspec->compression_type = wim->compression_type; + rspec->chunk_size = wim->chunk_size; } else { - spec->ctype = WIMLIB_COMPRESSION_TYPE_NONE; - spec->cchunk_size = 0; + rspec->compression_type = WIMLIB_COMPRESSION_TYPE_NONE; + rspec->chunk_size = 0; } } @@ -1375,29 +1521,20 @@ wim_res_spec_to_hdr(const struct wim_resource_spec *rspec, /* Translates a WIM resource header from the on-disk format into an in-memory * format. */ -int +void get_wim_reshdr(const struct wim_reshdr_disk *disk_reshdr, struct wim_reshdr *reshdr) { reshdr->offset_in_wim = le64_to_cpu(disk_reshdr->offset_in_wim); reshdr->size_in_wim = (((u64)disk_reshdr->size_in_wim[0] << 0) | - ((u64)disk_reshdr->size_in_wim[1] << 8) | - ((u64)disk_reshdr->size_in_wim[2] << 16) | - ((u64)disk_reshdr->size_in_wim[3] << 24) | - ((u64)disk_reshdr->size_in_wim[4] << 32) | - ((u64)disk_reshdr->size_in_wim[5] << 40) | - ((u64)disk_reshdr->size_in_wim[6] << 48)); + ((u64)disk_reshdr->size_in_wim[1] << 8) | + ((u64)disk_reshdr->size_in_wim[2] << 16) | + ((u64)disk_reshdr->size_in_wim[3] << 24) | + ((u64)disk_reshdr->size_in_wim[4] << 32) | + ((u64)disk_reshdr->size_in_wim[5] << 40) | + ((u64)disk_reshdr->size_in_wim[6] << 48)); reshdr->uncompressed_size = le64_to_cpu(disk_reshdr->uncompressed_size); reshdr->flags = disk_reshdr->flags; - - /* Truncate numbers to 62 bits to avoid possible overflows. */ - if (reshdr->offset_in_wim & 0xc000000000000000ULL) - return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - - if (reshdr->uncompressed_size & 0xc000000000000000ULL) - return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - - return 0; } /* Translates a WIM resource header from an in-memory format into the on-disk