X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fresource.c;h=134821971a6b3b20e4b6d79f520be219ab6c1dbc;hp=b8b8b92185c9e82a65146857226655b178b94f15;hb=944944f4627d18da3b4124ec34df15706baf0d9b;hpb=c86ed31aa6d3fb28cfa017fb8f6e4888a9ae26b2 diff --git a/src/resource.c b/src/resource.c index b8b8b921..13482197 100644 --- a/src/resource.c +++ b/src/resource.c @@ -1,56 +1,46 @@ /* * resource.c * - * Code for reading streams and resources, including compressed WIM resources. + * Code for reading blobs and resources, including compressed WIM resources. */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2012, 2013, 2015 Eric Biggers * - * This file is part of wimlib, a library for working with WIM files. + * This file is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) any + * later version. * - * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free Software - * Foundation; either version 3 of the License, or (at your option) any later - * version. + * This file is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. * - * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with - * wimlib; if not, see http://www.gnu.org/licenses/. + * You should have received a copy of the GNU Lesser General Public License + * along with this file; if not, see http://www.gnu.org/licenses/. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif -#include "wimlib.h" +#include +#include +#include + +#include "wimlib/alloca.h" +#include "wimlib/assert.h" +#include "wimlib/bitops.h" +#include "wimlib/blob_table.h" #include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/file_io.h" -#include "wimlib/lookup_table.h" +#include "wimlib/ntfs_3g.h" #include "wimlib/resource.h" #include "wimlib/sha1.h" - -#ifdef __WIN32__ -/* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */ -# include "wimlib/win32.h" -#endif - -#ifdef WITH_NTFS_3G -/* for read_ntfs_file_prefix() */ -# include "wimlib/ntfs_3g.h" -#endif - -#ifdef HAVE_ALLOCA_H -# include -#endif -#include -#include -#include -#include +#include "wimlib/wim.h" +#include "wimlib/win32.h" /* * Compressed WIM resources @@ -72,7 +62,7 @@ * little-endian integers. * * - The chunk table is included in the compressed size of the resource provided - * in the corresponding entry in the WIM's stream lookup table. + * in the corresponding entry in the WIM's blob table. * * - The compressed size of a chunk is never greater than the uncompressed size. * From the compressor's point of view, chunks that would have compressed to a @@ -90,62 +80,18 @@ */ -/* Decompress the specified chunk that uses the specified compression type - * @ctype, part of a WIM with default chunk size @wim_chunk_size. For LZX the - * separate @wim_chunk_size is needed because it determines the window size used - * for LZX compression. */ -static int -decompress(const void *cchunk, unsigned clen, void *uchunk, unsigned ulen, - int ctype, u32 wim_chunk_size) -{ - switch (ctype) { - case WIMLIB_COMPRESSION_TYPE_LZX: - return wimlib_lzx_decompress2(cchunk, clen, - uchunk, ulen, wim_chunk_size); - case WIMLIB_COMPRESSION_TYPE_XPRESS: - return wimlib_xpress_decompress(cchunk, clen, - uchunk, ulen); - case WIMLIB_COMPRESSION_TYPE_LZMS: - return wimlib_lzms_decompress(cchunk, clen, uchunk, ulen); - default: - ERROR("Invalid compression format (%d)", ctype); - return -1; - } -} - struct data_range { u64 offset; u64 size; }; -/* Alternate chunk table format for resources with - * WIM_RESHDR_FLAG_PACKED_STREAMS set. */ -struct alt_chunk_table_header_disk { - /* Uncompressed size of the resource in bytes. */ - le64 res_usize; - - /* Number of bytes each compressed chunk decompresses into, except - * possibly the last which decompresses into the remainder. */ - le32 chunk_size; - - /* Compression format used for compressed chunks: - * 0 = None - * 1 = LZX - * 2 = XPRESS - * 3 = LZMS */ - le32 compression_format; - - /* This header is directly followed by a table of compressed sizes of - * the chunks. */ -} _packed_attribute; - /* * read_compressed_wim_resource() - * * Read data from a compressed WIM resource. * - * @rspec - * Specification of the compressed WIM resource to read from. + * @rdesc + * Description of the compressed WIM resource to read from. * @ranges * Nonoverlapping, nonempty ranges of the uncompressed resource data to * read, sorted by increasing offset. @@ -153,16 +99,11 @@ struct alt_chunk_table_header_disk { * Number of ranges in @ranges; must be at least 1. * @cb * Callback function to feed the data being read. Each call provides the - * next chunk of the requested data. Each chunk will be of nonzero size - * and will not cross range boundaries, but otherwise is of unspecified - * size. + * next chunk of the requested data, uncompressed. Each chunk will be of + * nonzero size and will not cross range boundaries, but otherwise will be + * of unspecified size. * @cb_ctx * Parameter to pass to @cb_ctx. - * @raw_chunks_mode - * If %true, this function will provide the raw compressed chunks of the - * resource rather than the uncompressed data. In this mode, only a single - * data range can be requested, and it must cover the entire uncompressed - * resource. * * Possible return values: * @@ -175,12 +116,11 @@ struct alt_chunk_table_header_disk { * or other error code returned by the @cb function. */ static int -read_compressed_wim_resource(const struct wim_resource_spec * const rspec, +read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc, const struct data_range * const ranges, const size_t num_ranges, const consume_data_callback_t cb, - void * const cb_ctx, - const bool raw_chunks_mode) + void * const cb_ctx) { int ret; int errno_save; @@ -191,84 +131,82 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, bool chunk_offsets_malloced = false; bool ubuf_malloced = false; bool cbuf_malloced = false; + struct wimlib_decompressor *decompressor = NULL; /* Sanity checks */ - wimlib_assert(rspec != NULL); - wimlib_assert(resource_is_compressed(rspec)); + wimlib_assert(rdesc != NULL); + wimlib_assert(resource_is_compressed(rdesc)); wimlib_assert(cb != NULL); wimlib_assert(num_ranges != 0); for (size_t i = 0; i < num_ranges; i++) { DEBUG("Range %zu/%zu: %"PRIu64"@+%"PRIu64" / %"PRIu64, i + 1, num_ranges, ranges[i].size, ranges[i].offset, - rspec->uncompressed_size); + rdesc->uncompressed_size); wimlib_assert(ranges[i].size != 0); wimlib_assert(ranges[i].offset + ranges[i].size >= ranges[i].size); - wimlib_assert(ranges[i].offset + ranges[i].size <= rspec->uncompressed_size); + wimlib_assert(ranges[i].offset + ranges[i].size <= rdesc->uncompressed_size); } for (size_t i = 0; i < num_ranges - 1; i++) wimlib_assert(ranges[i].offset + ranges[i].size <= ranges[i + 1].offset); - if (raw_chunks_mode) { - wimlib_assert(num_ranges == 1); - wimlib_assert(ranges[0].offset == 0); - wimlib_assert(ranges[0].size == rspec->uncompressed_size); - } - /* Get the offsets of the first and last bytes of the read. */ const u64 first_offset = ranges[0].offset; const u64 last_offset = ranges[num_ranges - 1].offset + ranges[num_ranges - 1].size - 1; /* Get the file descriptor for the WIM. */ - struct filedes * const in_fd = &rspec->wim->in_fd; + struct filedes * const in_fd = &rdesc->wim->in_fd; /* Determine if we're reading a pipable resource from a pipe or not. */ - const bool is_pipe_read = !filedes_is_seekable(in_fd); + const bool is_pipe_read = (rdesc->is_pipable && !filedes_is_seekable(in_fd)); - /* Determine if the chunk table is in an altenate format. */ - const bool alt_chunk_table = (rspec->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) + /* Determine if the chunk table is in an alternate format. */ + const bool alt_chunk_table = (rdesc->flags & WIM_RESHDR_FLAG_SOLID) && !is_pipe_read; /* Get the maximum size of uncompressed chunks in this resource, which * we require be a power of 2. */ - u32 chunk_size; - u64 cur_read_offset = rspec->offset_in_wim; - int ctype; + u64 cur_read_offset = rdesc->offset_in_wim; + int ctype = rdesc->compression_type; + u32 chunk_size = rdesc->chunk_size; if (alt_chunk_table) { /* Alternate chunk table format. Its header specifies the chunk - * size and compression format. */ - struct alt_chunk_table_header_disk hdr; - - ret = full_pread(in_fd, &hdr, sizeof(hdr), cur_read_offset); - if (ret) - goto read_error; - cur_read_offset += sizeof(hdr); - - chunk_size = le32_to_cpu(hdr.chunk_size); - ctype = le32_to_cpu(hdr.compression_format); - - /* Format numbers must be the same as in WIMGAPI to be - * compatible. */ - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 1); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 2); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); - } else { - /* "Normal" format: the maximum uncompressed chunk size and the - * compression format default to those of the WIM itself. */ - chunk_size = rspec->wim->chunk_size; - ctype = rspec->wim->compression_type; + * size and compression format. Note: it could be read here; + * however, the relevant data was already loaded into @rdesc by + * read_blob_table(). */ + cur_read_offset += sizeof(struct alt_chunk_table_header_disk); } + if (!is_power_of_2(chunk_size)) { ERROR("Invalid compressed resource: " - "expected power-of-2 chunk size (got %u)", chunk_size); + "expected power-of-2 chunk size (got %"PRIu32")", + chunk_size); ret = WIMLIB_ERR_INVALID_CHUNK_SIZE; + errno = EINVAL; goto out_free_memory; } - const u32 chunk_order = bsr32(chunk_size); + /* Get valid decompressor. */ + if (ctype == rdesc->wim->decompressor_ctype && + chunk_size == rdesc->wim->decompressor_max_block_size) + { + /* Cached decompressor. */ + decompressor = rdesc->wim->decompressor; + rdesc->wim->decompressor_ctype = WIMLIB_COMPRESSION_TYPE_NONE; + rdesc->wim->decompressor = NULL; + } else { + ret = wimlib_create_decompressor(ctype, chunk_size, + &decompressor); + if (ret) { + if (ret != WIMLIB_ERR_NOMEM) + errno = EINVAL; + goto out_free_memory; + } + } + + const u32 chunk_order = fls32(chunk_size); /* Calculate the total number of chunks the resource is divided into. */ - const u64 num_chunks = (rspec->uncompressed_size + chunk_size - 1) >> chunk_order; + const u64 num_chunks = (rdesc->uncompressed_size + chunk_size - 1) >> chunk_order; /* Calculate the 0-based indices of the first and last chunks containing * data that needs to be passed to the callback. */ @@ -293,11 +231,9 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, const u64 num_chunk_entries = (alt_chunk_table ? num_chunks : num_chunks - 1); /* Set the size of each chunk table entry based on the resource's - * uncompressed size. XXX: Does the alternate chunk table really - * always have 4-byte entries? */ - const u64 chunk_entry_size = - (rspec->uncompressed_size > (1ULL << 32) && !alt_chunk_table) - ? 8 : 4; + * uncompressed size. */ + const u64 chunk_entry_size = get_chunk_entry_size(rdesc->uncompressed_size, + alt_chunk_table); /* Calculate the size of the chunk table in bytes. */ const u64 chunk_table_size = num_chunk_entries * chunk_entry_size; @@ -364,14 +300,14 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, const u64 file_offset_of_needed_chunk_entries = cur_read_offset + (first_chunk_entry_to_read * chunk_entry_size) - + (rspec->is_pipable ? (rspec->size_in_wim - chunk_table_size) : 0); + + (rdesc->is_pipable ? (rdesc->size_in_wim - chunk_table_size) : 0); void * const chunk_table_data = (u8*)chunk_offsets + chunk_offsets_alloc_size - chunk_table_size_to_read; - ret = full_pread(in_fd, chunk_table_data, chunk_table_size, + ret = full_pread(in_fd, chunk_table_data, chunk_table_size_to_read, file_offset_of_needed_chunk_entries); if (ret) goto read_error; @@ -379,8 +315,8 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, /* Now fill in chunk_offsets from the entries we have read in * chunk_tab_data. We break aliasing rules here to avoid having * to allocate yet another array. */ - typedef le64 __attribute__((may_alias)) aliased_le64_t; - typedef le32 __attribute__((may_alias)) aliased_le32_t; + typedef le64 _may_alias_attribute aliased_le64_t; + typedef le32 _may_alias_attribute aliased_le32_t; u64 * chunk_offsets_p = chunk_offsets; if (alt_chunk_table) { @@ -412,7 +348,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, /* Set offset to beginning of first chunk to read. */ cur_read_offset += chunk_offsets[0]; - if (rspec->is_pipable) + if (rdesc->is_pipable) cur_read_offset += read_start_chunk * sizeof(struct pwm_chunk_hdr); else cur_read_offset += chunk_table_size; @@ -428,19 +364,17 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, ubuf_malloced = true; } - /* Unless the raw compressed data was requested, allocate a temporary - * buffer for reading compressed chunks, each of which can be at most - * @chunk_size - 1 bytes. This excludes compressed chunks that are a - * full @chunk_size bytes, which are actually stored uncompressed. */ - if (!raw_chunks_mode) { - if (chunk_size - 1 <= STACK_MAX) { - cbuf = alloca(chunk_size - 1); - } else { - cbuf = MALLOC(chunk_size - 1); - if (cbuf == NULL) - goto oom; - cbuf_malloced = true; - } + /* Allocate a temporary buffer for reading compressed chunks, each of + * which can be at most @chunk_size - 1 bytes. This excludes compressed + * chunks that are a full @chunk_size bytes, which are actually stored + * uncompressed. */ + if (chunk_size - 1 <= STACK_MAX) { + cbuf = alloca(chunk_size - 1); + } else { + cbuf = MALLOC(chunk_size - 1); + if (cbuf == NULL) + goto oom; + cbuf_malloced = true; } /* Set current data range. */ @@ -454,8 +388,8 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, /* Calculate uncompressed size of next chunk. */ u32 chunk_usize; - if ((i == num_chunks - 1) && (rspec->uncompressed_size & (chunk_size - 1))) - chunk_usize = (rspec->uncompressed_size & (chunk_size - 1)); + if ((i == num_chunks - 1) && (rdesc->uncompressed_size & (chunk_size - 1))) + chunk_usize = (rdesc->uncompressed_size & (chunk_size - 1)); else chunk_usize = chunk_size; @@ -471,10 +405,10 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, chunk_csize = le32_to_cpu(chunk_hdr.compressed_size); } else { if (i == num_chunks - 1) { - chunk_csize = rspec->size_in_wim - + chunk_csize = rdesc->size_in_wim - chunk_table_full_size - chunk_offsets[i - read_start_chunk]; - if (rspec->is_pipable) + if (rdesc->is_pipable) chunk_csize -= num_chunks * sizeof(struct pwm_chunk_hdr); } else { chunk_csize = chunk_offsets[i + 1 - read_start_chunk] - @@ -487,7 +421,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, ret = WIMLIB_ERR_DECOMPRESSION; goto out_free_memory; } - if (rspec->is_pipable) + if (rdesc->is_pipable) cur_read_offset += sizeof(struct pwm_chunk_hdr); /* Offsets in the uncompressed resource at which this chunk @@ -513,7 +447,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, * function. */ u8 *read_buf; - if (chunk_csize == chunk_usize || raw_chunks_mode) + if (chunk_csize == chunk_usize) read_buf = ubuf; else read_buf = cbuf; @@ -527,14 +461,13 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, if (read_buf == cbuf) { DEBUG("Decompressing chunk %"PRIu64" " - "(csize=%"PRIu64" usize=%"PRIu64"", + "(csize=%"PRIu32" usize=%"PRIu32")", i, chunk_csize, chunk_usize); - ret = decompress(cbuf, - chunk_csize, - ubuf, - chunk_usize, - ctype, - chunk_size); + ret = wimlib_decompress(cbuf, + chunk_csize, + ubuf, + chunk_usize, + decompressor); if (ret) { ERROR("Failed to decompress data!"); ret = WIMLIB_ERR_DECOMPRESSION; @@ -557,10 +490,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, end = min(cur_range_end, chunk_end_offset) - chunk_start_offset; size = end - start; - if (raw_chunks_mode) - ret = (*cb)(&ubuf[0], chunk_csize, cb_ctx); - else - ret = (*cb)(&ubuf[start], size, cb_ctx); + ret = (*cb)(&ubuf[start], size, cb_ctx); if (ret) goto out_free_memory; @@ -580,7 +510,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, } if (is_pipe_read && - last_offset == rspec->uncompressed_size - 1 && + last_offset == rdesc->uncompressed_size - 1 && chunk_table_size) { u8 dummy; @@ -594,8 +524,15 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, goto read_error; } ret = 0; + out_free_memory: errno_save = errno; + if (decompressor) { + wimlib_free_decompressor(rdesc->wim->decompressor); + rdesc->wim->decompressor = decompressor; + rdesc->wim->decompressor_ctype = ctype; + rdesc->wim->decompressor_max_block_size = chunk_size; + } if (chunk_offsets_malloced) FREE(chunk_offsets); if (ubuf_malloced) @@ -617,11 +554,33 @@ read_error: goto out_free_memory; } +static int +fill_zeroes(u64 size, consume_data_callback_t cb, void *cb_ctx) +{ + if (unlikely(size)) { + u8 buf[min(size, BUFFER_SIZE)]; + + memset(buf, 0, sizeof(buf)); + + do { + size_t len; + int ret; + + len = min(size, BUFFER_SIZE); + ret = cb(buf, len, cb_ctx); + if (ret) + return ret; + size -= len; + } while (size); + } + return 0; +} + /* Read raw data from a file descriptor at the specified offset, feeding the * data it in chunks into the specified callback function. */ static int -read_raw_file_data(struct filedes *in_fd, u64 size, - consume_data_callback_t cb, void *cb_ctx, u64 offset) +read_raw_file_data(struct filedes *in_fd, u64 offset, u64 size, + consume_data_callback_t cb, void *cb_ctx) { u8 buf[BUFFER_SIZE]; size_t bytes_to_read; @@ -658,24 +617,21 @@ bufferer_cb(const void *chunk, size_t size, void *_ctx) * read_partial_wim_resource()- * * Read a range of data from an uncompressed or compressed resource in a WIM - * file. Data is fed chunk-by-chunk into the callback function @cb, passing it - * the argument @cb_ctx. The chunks are of unspecified size unless the - * RAW_CHUNKS mode is requested. - * - * By default, this function provides the uncompressed data of the resource, and - * @offset and @size and interpreted relative to the uncompressed contents of - * the resource. This behavior can be modified by either of the following - * flags: + * file. * - * WIMLIB_READ_RESOURCE_FLAG_RAW_FULL: - * Read @size bytes at @offset of the raw contents of the compressed - * resource. In the case of pipable resources, this excludes the stream - * header. Exclusive with WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS. - * - * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS: - * Read the raw compressed chunks of the compressed resource. @size must - * be the full uncompressed size, @offset must be 0, and @cb_chunk_size - * must be the resource chunk size. + * @rdesc + * Description of the WIM resource to read from. + * @offset + * Offset within the uncompressed resource at which to start reading. + * @size + * Number of bytes to read. + * @cb + * Callback function to feed the data being read. Each call provides the + * next chunk of the requested data, uncompressed. Each chunk will be of + * nonzero size and will not cross range boundaries, but otherwise will be + * of unspecified size. + * @cb_ctx + * Parameter to pass to @cb_ctx. * * Return values: * WIMLIB_ERR_SUCCESS (0) @@ -687,67 +643,80 @@ bufferer_cb(const void *chunk, size_t size, void *_ctx) * or other error code returned by the @cb function. */ static int -read_partial_wim_resource(const struct wim_resource_spec *rspec, - u64 offset, u64 size, consume_data_callback_t cb, - void *cb_ctx, int flags) +read_partial_wim_resource(const struct wim_resource_descriptor *rdesc, + u64 offset, u64 size, + consume_data_callback_t cb, void *cb_ctx) { /* Sanity checks. */ - if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) { - wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS)); - wimlib_assert(offset + size >= offset); - wimlib_assert(offset + size <= rspec->size_in_wim); - } else if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) { - wimlib_assert(offset == 0); - wimlib_assert(size == rspec->uncompressed_size); - } else { - wimlib_assert(offset + size >= offset); - wimlib_assert(offset + size <= rspec->uncompressed_size); - } + wimlib_assert(offset + size >= offset); + wimlib_assert(offset + size <= rdesc->uncompressed_size); DEBUG("Reading %"PRIu64" @ %"PRIu64" from WIM resource " - "%"PRIu64" => %"PRIu64" @ %"PRIu64" (flags 0x%08x)", - size, offset, rspec->uncompressed_size, - rspec->size_in_wim, rspec->offset_in_wim, flags); + "%"PRIu64" => %"PRIu64" @ %"PRIu64, + size, offset, rdesc->uncompressed_size, + rdesc->size_in_wim, rdesc->offset_in_wim); /* Trivial case. */ if (size == 0) return 0; - if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) || - !resource_is_compressed(rspec)) - { - return read_raw_file_data(&rspec->wim->in_fd, - size, - cb, - cb_ctx, - rspec->offset_in_wim + offset); - } else { - bool raw_chunks = (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS); + if (resource_is_compressed(rdesc)) { struct data_range range = { .offset = offset, .size = size, }; - return read_compressed_wim_resource(rspec, &range, 1, - cb, cb_ctx, raw_chunks); + return read_compressed_wim_resource(rdesc, &range, 1, + cb, cb_ctx); + } else { + /* Reading uncompressed resource. For completeness, handle the + * weird case where size_in_wim < uncompressed_size. */ + + u64 read_size; + u64 zeroes_size; + int ret; + + if (likely(offset + size <= rdesc->size_in_wim) || + rdesc->is_pipable) + { + read_size = size; + zeroes_size = 0; + } else { + if (offset >= rdesc->size_in_wim) { + read_size = 0; + zeroes_size = size; + } else { + read_size = rdesc->size_in_wim - offset; + zeroes_size = offset + size - rdesc->size_in_wim; + } + } + + ret = read_raw_file_data(&rdesc->wim->in_fd, + rdesc->offset_in_wim + offset, + read_size, + cb, + cb_ctx); + if (ret) + return ret; + + return fill_zeroes(zeroes_size, cb, cb_ctx); } } -/* Read the specified range of uncompressed data from the specified stream, - * which must be located into a WIM file, into the specified buffer. */ +/* Read the specified range of uncompressed data from the specified blob, which + * must be located into a WIM file, into the specified buffer. */ int -read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte, - size_t size, u64 offset, void *_buf) +read_partial_wim_blob_into_buf(const struct blob_descriptor *blob, + size_t size, u64 offset, void *_buf) { u8 *buf = _buf; - wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); + wimlib_assert(blob->blob_location == BLOB_IN_WIM); - return read_partial_wim_resource(lte->rspec, - lte->offset_in_res + offset, + return read_partial_wim_resource(blob->rdesc, + blob->offset_in_res + offset, size, bufferer_cb, - &buf, - 0); + &buf); } /* A consume_data_callback_t implementation that simply ignores the data @@ -758,157 +727,159 @@ skip_chunk_cb(const void *chunk, size_t size, void *_ctx) return 0; } -/* Skip over the data of the specified stream, which must correspond to a full - * WIM resource. */ +/* Skip over the data of the specified WIM resource. */ int -skip_wim_stream(struct wim_lookup_table_entry *lte) +skip_wim_resource(struct wim_resource_descriptor *rdesc) { - wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); - wimlib_assert(!lte_is_partial(lte)); - return read_partial_wim_resource(lte->rspec, - 0, - lte->rspec->uncompressed_size, - skip_chunk_cb, - NULL, - WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS); + DEBUG("Skipping resource (size=%"PRIu64")", rdesc->uncompressed_size); + return read_partial_wim_resource(rdesc, 0, rdesc->uncompressed_size, + skip_chunk_cb, NULL); } static int -read_wim_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, void *cb_ctx, int flags) +read_wim_blob_prefix(const struct blob_descriptor *blob, u64 size, + consume_data_callback_t cb, void *cb_ctx) { - return read_partial_wim_resource(lte->rspec, - lte->offset_in_res, - size, - cb, - cb_ctx, - flags); + return read_partial_wim_resource(blob->rdesc, blob->offset_in_res, size, + cb, cb_ctx); } -#ifndef __WIN32__ -/* This function handles reading stream data that is located in an external - * file, such as a file that has been added to the WIM image through execution - * of a wimlib_add_command. +/* This function handles reading blob data that is located in an external file, + * such as a file that has been added to the WIM image through execution of a + * wimlib_add_command. * * This assumes the file can be accessed using the standard POSIX open(), * read(), and close(). On Windows this will not necessarily be the case (since * the file may need FILE_FLAG_BACKUP_SEMANTICS to be opened, or the file may be * encrypted), so Windows uses its own code for its equivalent case. */ static int -read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, void *cb_ctx, - int _ignored_flags) +read_file_on_disk_prefix(const struct blob_descriptor *blob, u64 size, + consume_data_callback_t cb, void *cb_ctx) { int ret; int raw_fd; struct filedes fd; - wimlib_assert(size <= lte->size); + DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, blob->file_on_disk); + + raw_fd = topen(blob->file_on_disk, O_BINARY | O_RDONLY); + if (raw_fd < 0) { + ERROR_WITH_ERRNO("Can't open \"%"TS"\"", blob->file_on_disk); + return WIMLIB_ERR_OPEN; + } + filedes_init(&fd, raw_fd); + ret = read_raw_file_data(&fd, 0, size, cb, cb_ctx); + filedes_close(&fd); + return ret; +} + +#ifdef WITH_FUSE +static int +read_staging_file_prefix(const struct blob_descriptor *blob, u64 size, + consume_data_callback_t cb, void *cb_ctx) +{ + int raw_fd; + struct filedes fd; + int ret; - DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, lte->file_on_disk); + DEBUG("Reading %"PRIu64" bytes from staging file \"%s\"", + size, blob->staging_file_name); - raw_fd = open(lte->file_on_disk, O_BINARY | O_RDONLY); + raw_fd = openat(blob->staging_dir_fd, blob->staging_file_name, + O_RDONLY | O_NOFOLLOW); if (raw_fd < 0) { - ERROR_WITH_ERRNO("Can't open \"%"TS"\"", lte->file_on_disk); + ERROR_WITH_ERRNO("Can't open staging file \"%s\"", + blob->staging_file_name); return WIMLIB_ERR_OPEN; } filedes_init(&fd, raw_fd); - ret = read_raw_file_data(&fd, size, cb, cb_ctx, 0); + ret = read_raw_file_data(&fd, 0, size, cb, cb_ctx); filedes_close(&fd); return ret; } -#endif /* !__WIN32__ */ +#endif -/* This function handles the trivial case of reading stream data that is, in - * fact, already located in an in-memory buffer. */ +/* This function handles the trivial case of reading blob data that is, in fact, + * already located in an in-memory buffer. */ static int -read_buffer_prefix(const struct wim_lookup_table_entry *lte, - u64 size, consume_data_callback_t cb, - void *cb_ctx, int _ignored_flags) +read_buffer_prefix(const struct blob_descriptor *blob, + u64 size, consume_data_callback_t cb, void *cb_ctx) { - wimlib_assert(size <= lte->size); - return (*cb)(lte->attached_buffer, size, cb_ctx); + return (*cb)(blob->attached_buffer, size, cb_ctx); } -typedef int (*read_stream_prefix_handler_t)(const struct wim_lookup_table_entry *lte, - u64 size, - consume_data_callback_t cb, - void *cb_ctx, int flags); +typedef int (*read_blob_prefix_handler_t)(const struct blob_descriptor *blob, + u64 size, + consume_data_callback_t cb, + void *cb_ctx); /* - * read_stream_prefix()- + * read_blob_prefix()- * - * Reads the first @size bytes from a generic "stream", which may be located in + * Reads the first @size bytes from a generic "blob", which may be located in * any one of several locations, such as in a WIM file (compressed or * uncompressed), in an external file, or directly in an in-memory buffer. * * This function feeds the data to a callback function @cb in chunks of * unspecified size. * - * If the stream is located in a WIM file, @flags can be set as documented in - * read_partial_wim_resource(). Otherwise @flags are ignored. - * * Returns 0 on success; nonzero on error. A nonzero value will be returned if - * the stream data cannot be successfully read (for a number of different - * reasons, depending on the stream location), or if @cb returned nonzero in - * which case that error code will be returned. + * the blob data cannot be successfully read (for a number of different reasons, + * depending on the blob location), or if @cb returned nonzero in which case + * that error code will be returned. */ -int -read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, void *cb_ctx, int flags) +static int +read_blob_prefix(const struct blob_descriptor *blob, u64 size, + consume_data_callback_t cb, void *cb_ctx) { - static const read_stream_prefix_handler_t handlers[] = { - [RESOURCE_IN_WIM] = read_wim_stream_prefix, - #ifdef __WIN32__ - [RESOURCE_IN_FILE_ON_DISK] = read_win32_file_prefix, - #else - [RESOURCE_IN_FILE_ON_DISK] = read_file_on_disk_prefix, - #endif - [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix, + static const read_blob_prefix_handler_t handlers[] = { + [BLOB_IN_WIM] = read_wim_blob_prefix, + [BLOB_IN_FILE_ON_DISK] = read_file_on_disk_prefix, + [BLOB_IN_ATTACHED_BUFFER] = read_buffer_prefix, #ifdef WITH_FUSE - [RESOURCE_IN_STAGING_FILE] = read_file_on_disk_prefix, + [BLOB_IN_STAGING_FILE] = read_staging_file_prefix, #endif #ifdef WITH_NTFS_3G - [RESOURCE_IN_NTFS_VOLUME] = read_ntfs_file_prefix, + [BLOB_IN_NTFS_VOLUME] = read_ntfs_attribute_prefix, #endif #ifdef __WIN32__ - [RESOURCE_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix, + [BLOB_IN_WINNT_FILE_ON_DISK] = read_winnt_stream_prefix, + [BLOB_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix, #endif }; - wimlib_assert(lte->resource_location < ARRAY_LEN(handlers) - && handlers[lte->resource_location] != NULL); - return handlers[lte->resource_location](lte, size, cb, cb_ctx, flags); + wimlib_assert(blob->blob_location < ARRAY_LEN(handlers) + && handlers[blob->blob_location] != NULL); + wimlib_assert(size <= blob->size); + return handlers[blob->blob_location](blob, size, cb, cb_ctx); } -/* Read the full uncompressed data of the specified stream into the specified - * buffer, which must have space for at least lte->size bytes. */ +/* Read the full uncompressed data of the specified blob into the specified + * buffer, which must have space for at least blob->size bytes. */ int -read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *_buf) +read_full_blob_into_buf(const struct blob_descriptor *blob, void *_buf) { u8 *buf = _buf; - return read_stream_prefix(lte, lte->size, bufferer_cb, &buf, 0); + return read_blob_prefix(blob, blob->size, bufferer_cb, &buf); } -/* Retrieve the full uncompressed data of the specified stream. A buffer large +/* Retrieve the full uncompressed data of the specified blob. A buffer large * enough hold the data is allocated and returned in @buf_ret. */ int -read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte, - void **buf_ret) +read_full_blob_into_alloc_buf(const struct blob_descriptor *blob, void **buf_ret) { int ret; void *buf; - if ((size_t)lte->size != lte->size) { - ERROR("Can't read %"PRIu64" byte stream into " - "memory", lte->size); + if ((size_t)blob->size != blob->size) { + ERROR("Can't read %"PRIu64" byte blob into memory", blob->size); return WIMLIB_ERR_NOMEM; } - buf = MALLOC(lte->size); + buf = MALLOC(blob->size); if (buf == NULL) return WIMLIB_ERR_NOMEM; - ret = read_full_stream_into_buf(lte, buf); + ret = read_full_blob_into_buf(blob, buf); if (ret) { FREE(buf); return ret; @@ -918,75 +889,80 @@ read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte, return 0; } -/* Retrieve the full uncompressed data of the specified WIM resource. A buffer - * large enough hold the data is allocated and returned in @buf_ret. */ -static int -wim_resource_spec_to_data(struct wim_resource_spec *rspec, void **buf_ret) +/* Retrieve the full uncompressed data of a WIM resource specified as a raw + * `wim_reshdr' and the corresponding WIM file. A buffer large enough hold the + * data is allocated and returned in @buf_ret. */ +int +wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim, void **buf_ret) { - int ret; - struct wim_lookup_table_entry *lte; + struct wim_resource_descriptor rdesc; + struct blob_descriptor blob; - lte = new_lookup_table_entry(); - if (lte == NULL) - return WIMLIB_ERR_NOMEM; - - lte->unhashed = 1; - lte_bind_wim_resource_spec(lte, rspec); - lte->flags = rspec->flags; - lte->size = rspec->uncompressed_size; - lte->offset_in_res = 0; + wim_res_hdr_to_desc(reshdr, wim, &rdesc); + blob_set_is_located_in_nonsolid_wim_resource(&blob, &rdesc); - ret = read_full_stream_into_alloc_buf(lte, buf_ret); - - lte_unbind_wim_resource_spec(lte); - free_lookup_table_entry(lte); - return ret; + return read_full_blob_into_alloc_buf(&blob, buf_ret); } -/* Retrieve the full uncompressed data of a WIM resource specified as a raw - * `wim_reshdr' and the corresponding WIM file. A large enough hold the data is - * allocated and returned in @buf_ret. */ int -wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim, void **buf_ret) +wim_reshdr_to_hash(const struct wim_reshdr *reshdr, WIMStruct *wim, + u8 hash[SHA1_HASH_SIZE]) { - DEBUG("offset_in_wim=%"PRIu64", size_in_wim=%"PRIu64", " - "uncompressed_size=%"PRIu64, - reshdr->offset_in_wim, reshdr->size_in_wim, - reshdr->uncompressed_size); - - struct wim_resource_spec rspec; - wim_res_hdr_to_spec(reshdr, wim, &rspec); - return wim_resource_spec_to_data(&rspec, buf_ret); + struct wim_resource_descriptor rdesc; + struct blob_descriptor blob; + int ret; + + wim_res_hdr_to_desc(reshdr, wim, &rdesc); + blob_set_is_located_in_nonsolid_wim_resource(&blob, &rdesc); + blob.unhashed = 1; + + ret = sha1_blob(&blob); + if (ret) + return ret; + copy_hash(hash, blob.hash); + return 0; } -struct streamifier_context { - struct read_stream_list_callbacks cbs; - struct wim_lookup_table_entry *cur_stream; - u64 cur_stream_offset; - struct wim_lookup_table_entry *final_stream; +struct blobifier_context { + struct read_blob_list_callbacks cbs; + struct blob_descriptor *cur_blob; + struct blob_descriptor *next_blob; + u64 cur_blob_offset; + struct blob_descriptor *final_blob; size_t list_head_offset; }; +static struct blob_descriptor * +next_blob(struct blob_descriptor *blob, size_t list_head_offset) +{ + struct list_head *cur; + + cur = (struct list_head*)((u8*)blob + list_head_offset); + + return (struct blob_descriptor*)((u8*)cur->next - list_head_offset); +} + /* A consume_data_callback_t implementation that translates raw resource data - * into streams, calling the begin_stream, consume_chunk, and end_stream - * callback functions as appropriate. */ + * into blobs, calling the begin_blob, consume_chunk, and end_blob callback + * functions as appropriate. */ static int -streamifier_cb(const void *chunk, size_t size, void *_ctx) +blobifier_cb(const void *chunk, size_t size, void *_ctx) { - struct streamifier_context *ctx = _ctx; + struct blobifier_context *ctx = _ctx; int ret; - DEBUG("%zu bytes passed to streamifier", size); + DEBUG("%zu bytes passed to blobifier", size); + + wimlib_assert(ctx->cur_blob != NULL); + wimlib_assert(size <= ctx->cur_blob->size - ctx->cur_blob_offset); - wimlib_assert(ctx->cur_stream != NULL); - wimlib_assert(size <= ctx->cur_stream->size - ctx->cur_stream_offset); + if (ctx->cur_blob_offset == 0) { - if (ctx->cur_stream_offset == 0) { - /* Starting a new stream. */ - DEBUG("Begin new stream (size=%"PRIu64").", - ctx->cur_stream->size); - ret = (*ctx->cbs.begin_stream)(ctx->cur_stream, true, - ctx->cbs.begin_stream_ctx); + /* Starting a new blob. */ + DEBUG("Begin new blob (size=%"PRIu64").", ctx->cur_blob->size); + + ret = (*ctx->cbs.begin_blob)(ctx->cur_blob, + ctx->cbs.begin_blob_ctx); if (ret) return ret; } @@ -994,33 +970,29 @@ streamifier_cb(const void *chunk, size_t size, void *_ctx) /* Consume the chunk. */ ret = (*ctx->cbs.consume_chunk)(chunk, size, ctx->cbs.consume_chunk_ctx); + ctx->cur_blob_offset += size; if (ret) return ret; - ctx->cur_stream_offset += size; - if (ctx->cur_stream_offset == ctx->cur_stream->size) { - /* Finished reading all the data for a stream. */ - DEBUG("End stream (size=%"PRIu64").", ctx->cur_stream->size); - ret = (*ctx->cbs.end_stream)(ctx->cur_stream, 0, - ctx->cbs.end_stream_ctx); - if (ret) - return ret; + if (ctx->cur_blob_offset == ctx->cur_blob->size) { + /* Finished reading all the data for a blob. */ - if (ctx->cur_stream != ctx->final_stream) { - /* Advance to next stream. */ - struct list_head *cur, *next; + ctx->cur_blob_offset = 0; - cur = (struct list_head *) - ((u8*)ctx->cur_stream + ctx->list_head_offset); - next = cur->next; - - ctx->cur_stream = (struct wim_lookup_table_entry *) - ((u8*)next - ctx->list_head_offset); + DEBUG("End blob (size=%"PRIu64").", ctx->cur_blob->size); + ret = (*ctx->cbs.end_blob)(ctx->cur_blob, 0, + ctx->cbs.end_blob_ctx); + if (ret) + return ret; - ctx->cur_stream_offset = 0; - } else { - /* No more streams. */ - ctx->cur_stream = NULL; + /* Advance to next blob. */ + ctx->cur_blob = ctx->next_blob; + if (ctx->cur_blob != NULL) { + if (ctx->cur_blob != ctx->final_blob) + ctx->next_blob = next_blob(ctx->cur_blob, + ctx->list_head_offset); + else + ctx->next_blob = NULL; } } return 0; @@ -1028,30 +1000,29 @@ streamifier_cb(const void *chunk, size_t size, void *_ctx) struct hasher_context { SHA_CTX sha_ctx; - struct read_stream_list_callbacks cbs; + int flags; + struct read_blob_list_callbacks cbs; }; -/* Callback for starting to read a stream while calculating its SHA1 message +/* Callback for starting to read a blob while calculating its SHA-1 message * digest. */ static int -hasher_begin_stream(struct wim_lookup_table_entry *lte, bool is_partial_res, - void *_ctx) +hasher_begin_blob(struct blob_descriptor *blob, void *_ctx) { struct hasher_context *ctx = _ctx; sha1_init(&ctx->sha_ctx); - if (ctx->cbs.begin_stream == NULL) + if (ctx->cbs.begin_blob == NULL) return 0; else - return (*ctx->cbs.begin_stream)(lte, is_partial_res, - ctx->cbs.begin_stream_ctx); + return (*ctx->cbs.begin_blob)(blob, ctx->cbs.begin_blob_ctx); } -/* A consume_data_callback_t implementation that continues calculating the SHA1 - * message digest of the stream being read, then optionally passes the data on - * to another consume_data_callback_t implementation. This allows checking the - * SHA1 message digest of a stream being extracted, for example. */ +/* A consume_data_callback_t implementation that continues calculating the SHA-1 + * message digest of the blob being read, then optionally passes the data on to + * another consume_data_callback_t implementation. This allows checking the + * SHA-1 message digest of a blob being extracted, for example. */ static int hasher_consume_chunk(const void *chunk, size_t size, void *_ctx) { @@ -1064,368 +1035,342 @@ hasher_consume_chunk(const void *chunk, size_t size, void *_ctx) return (*ctx->cbs.consume_chunk)(chunk, size, ctx->cbs.consume_chunk_ctx); } -/* Callback for finishing reading a stream while calculating its SHA1 message +/* Callback for finishing reading a blob while calculating its SHA-1 message * digest. */ static int -hasher_end_stream(struct wim_lookup_table_entry *lte, int status, void *_ctx) +hasher_end_blob(struct blob_descriptor *blob, int status, void *_ctx) { struct hasher_context *ctx = _ctx; u8 hash[SHA1_HASH_SIZE]; int ret; if (status) { - /* Error occurred; the full stream may not have been read. */ + /* Error occurred; the full blob may not have been read. */ ret = status; goto out_next_cb; } - /* Retrieve the final SHA1 message digest. */ + /* Retrieve the final SHA-1 message digest. */ sha1_final(hash, &ctx->sha_ctx); - if (lte->unhashed) { - /* No SHA1 message digest was previously present for the stream. - * Set it to the one just calculated. */ - DEBUG("Set SHA1 message digest for stream (size=%"PRIu64").", lte->size); - copy_hash(lte->hash, hash); + if (blob->unhashed) { + if (ctx->flags & COMPUTE_MISSING_BLOB_HASHES) { + /* No SHA-1 message digest was previously present for the + * blob. Set it to the one just calculated. */ + DEBUG("Set SHA-1 message digest for blob " + "(size=%"PRIu64").", blob->size); + copy_hash(blob->hash, hash); + } } else { - /* The stream already had a SHA1 message digest present. Verify - * that it is the same as the calculated value. */ - if (!hashes_equal(hash, lte->hash)) { - if (wimlib_print_errors) { - ERROR("Invalid SHA1 message digest " - "on the following WIM stream:"); - print_lookup_table_entry(lte, stderr); - if (lte->resource_location == RESOURCE_IN_WIM) - ERROR("The WIM file appears to be corrupt!"); + if (ctx->flags & VERIFY_BLOB_HASHES) { + /* The blob already had a SHA-1 message digest present. + * Verify that it is the same as the calculated value. + */ + if (!hashes_equal(hash, blob->hash)) { + if (wimlib_print_errors) { + tchar expected_hashstr[SHA1_HASH_SIZE * 2 + 1]; + tchar actual_hashstr[SHA1_HASH_SIZE * 2 + 1]; + sprint_hash(blob->hash, expected_hashstr); + sprint_hash(hash, actual_hashstr); + ERROR("The data is corrupted!\n" + " (Expected SHA-1=%"TS",\n" + " got SHA-1=%"TS")", + expected_hashstr, actual_hashstr); + } + ret = WIMLIB_ERR_INVALID_RESOURCE_HASH; + errno = EINVAL; + goto out_next_cb; } - ret = WIMLIB_ERR_INVALID_RESOURCE_HASH; - errno = EINVAL; - goto out_next_cb; + DEBUG("SHA-1 message digest okay for " + "blob (size=%"PRIu64").", blob->size); } - DEBUG("SHA1 message digest okay for stream (size=%"PRIu64").", lte->size); } ret = 0; out_next_cb: - if (ctx->cbs.end_stream == NULL) + if (ctx->cbs.end_blob == NULL) return ret; else - return (*ctx->cbs.end_stream)(lte, ret, ctx->cbs.end_stream_ctx); + return (*ctx->cbs.end_blob)(blob, ret, ctx->cbs.end_blob_ctx); } -/* Read the full data of the specified stream, passing the data into the - * specified callbacks (all of which are optional) and either checking or - * computing the SHA1 message digest of the stream. */ static int -read_full_stream_with_sha1(struct wim_lookup_table_entry *lte, - const struct read_stream_list_callbacks *cbs) +read_full_blob_with_cbs(struct blob_descriptor *blob, + const struct read_blob_list_callbacks *cbs) { int ret; - struct hasher_context hasher_ctx = { - .cbs = *cbs, - }; - - ret = hasher_begin_stream(lte, false, &hasher_ctx); + ret = (*cbs->begin_blob)(blob, cbs->begin_blob_ctx); if (ret) return ret; - ret = read_stream_prefix(lte, lte->size, hasher_consume_chunk, - &hasher_ctx, 0); + ret = read_blob_prefix(blob, blob->size, cbs->consume_chunk, + cbs->consume_chunk_ctx); - return hasher_end_stream(lte, ret, &hasher_ctx); + return (*cbs->end_blob)(blob, ret, cbs->end_blob_ctx); } -struct rechunkifier_context { - u8 *buffer; - u32 buffer_filled; - u32 cb_chunk_size; - - const struct data_range *ranges; - size_t num_ranges; - size_t cur_range; - u64 range_bytes_remaining; - - consume_data_callback_t cb; - void *cb_ctx; -}; +/* Read the full data of the specified blob, passing the data into the specified + * callbacks (all of which are optional) and either checking or computing the + * SHA-1 message digest of the blob. */ +static int +read_full_blob_with_sha1(struct blob_descriptor *blob, + const struct read_blob_list_callbacks *cbs) +{ + struct hasher_context hasher_ctx = { + .flags = VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES, + .cbs = *cbs, + }; + struct read_blob_list_callbacks hasher_cbs = { + .begin_blob = hasher_begin_blob, + .begin_blob_ctx = &hasher_ctx, + .consume_chunk = hasher_consume_chunk, + .consume_chunk_ctx = &hasher_ctx, + .end_blob = hasher_end_blob, + .end_blob_ctx = &hasher_ctx, + }; + return read_full_blob_with_cbs(blob, &hasher_cbs); +} -/* Wrapper callback for adjusting the data chunk size. */ static int -rechunkifier_cb(const void *chunk, size_t size, void *_ctx) +read_blobs_in_solid_resource(struct blob_descriptor *first_blob, + struct blob_descriptor *last_blob, + u64 blob_count, + size_t list_head_offset, + const struct read_blob_list_callbacks *sink_cbs) { - struct rechunkifier_context *ctx = _ctx; - const u8 *chunkptr = chunk; - size_t bytes_to_copy; + struct data_range *ranges; + bool ranges_malloced; + struct blob_descriptor *cur_blob; + size_t i; int ret; + u64 ranges_alloc_size; - wimlib_assert(ctx->cur_range != ctx->num_ranges); + DEBUG("Reading %"PRIu64" blobs combined in same WIM resource", + blob_count); - while (size) { + /* Setup data ranges array (one range per blob to read); this way + * read_compressed_wim_resource() does not need to be aware of blobs. + */ - /* Append more data to the buffer. */ - bytes_to_copy = size; + ranges_alloc_size = blob_count * sizeof(ranges[0]); - if (bytes_to_copy > ctx->cb_chunk_size - ctx->buffer_filled) - bytes_to_copy = ctx->cb_chunk_size - ctx->buffer_filled; + if (unlikely((size_t)ranges_alloc_size != ranges_alloc_size)) { + ERROR("Too many blobs in one resource!"); + return WIMLIB_ERR_NOMEM; + } + if (likely(ranges_alloc_size <= STACK_MAX)) { + ranges = alloca(ranges_alloc_size); + ranges_malloced = false; + } else { + ranges = MALLOC(ranges_alloc_size); + if (ranges == NULL) { + ERROR("Too many blobs in one resource!"); + return WIMLIB_ERR_NOMEM; + } + ranges_malloced = true; + } - if (bytes_to_copy > ctx->range_bytes_remaining - ctx->buffer_filled) - bytes_to_copy = ctx->range_bytes_remaining - ctx->buffer_filled; + for (i = 0, cur_blob = first_blob; + i < blob_count; + i++, cur_blob = next_blob(cur_blob, list_head_offset)) + { + ranges[i].offset = cur_blob->offset_in_res; + ranges[i].size = cur_blob->size; + } - memcpy(&ctx->buffer[ctx->buffer_filled], chunkptr, bytes_to_copy); + struct blobifier_context blobifier_ctx = { + .cbs = *sink_cbs, + .cur_blob = first_blob, + .next_blob = next_blob(first_blob, list_head_offset), + .cur_blob_offset = 0, + .final_blob = last_blob, + .list_head_offset = list_head_offset, + }; - ctx->buffer_filled += bytes_to_copy; - chunkptr += bytes_to_copy; - size -= bytes_to_copy; - ctx->range_bytes_remaining -= bytes_to_copy; + ret = read_compressed_wim_resource(first_blob->rdesc, + ranges, + blob_count, + blobifier_cb, + &blobifier_ctx); - if (ctx->buffer_filled == ctx->cb_chunk_size || - ctx->range_bytes_remaining == 0) - { - /* Maximum chunk size reached, or current range ended. - * Call the next consume_data_callback_t and empty the - * buffer */ - ret = (*ctx->cb)(ctx->buffer, ctx->buffer_filled, ctx->cb_ctx); - if (ret) - return ret; - ctx->buffer_filled = 0; + if (ranges_malloced) + FREE(ranges); - if (ctx->range_bytes_remaining == 0 && - ++ctx->cur_range != ctx->num_ranges) - ctx->range_bytes_remaining = ctx->ranges[ctx->cur_range].size; + if (ret) { + if (blobifier_ctx.cur_blob_offset != 0) { + ret = (*blobifier_ctx.cbs.end_blob) + (blobifier_ctx.cur_blob, + ret, + blobifier_ctx.cbs.end_blob_ctx); } } - return 0; + return ret; } /* - * Read a list of streams, each of which may be in any supported location (e.g. - * in a WIM or in an external file). Unlike read_stream_prefix() or the - * functions which call it, this function optimizes the case where multiple - * streams are packed into a single compressed WIM resource and reads them all - * consecutively, only decompressing the data one time. + * Read a list of blobs, each of which may be in any supported location (e.g. + * in a WIM or in an external file). This function optimizes the case where + * multiple blobs are combined into a single solid compressed WIM resource by + * reading the blobs in sequential order, only decompressing the solid resource + * one time. * - * @stream_list - * List of streams (represented as `struct wim_lookup_table_entry's) to - * read. + * @blob_list + * List of blobs to read. * @list_head_offset - * Offset of the `struct list_head' within each `struct - * wim_lookup_table_entry' that makes up the @stream_list. - * @cb_chunk_size - * Size of chunks to provide to @consume_chunk. For a given stream, all - * the chunks will be this size, except possibly the last which will be the - * remainder. If @ - * @cbs TODO + * Offset of the `struct list_head' within each `struct blob_descriptor' that makes up + * the @blob_list. + * @cbs + * Callback functions to accept the blob data. + * @flags + * Bitwise OR of zero or more of the following flags: + * + * VERIFY_BLOB_HASHES: + * For all blobs being read that have already had SHA-1 message + * digests computed, calculate the SHA-1 message digest of the read + * data and compare it with the previously computed value. If they + * do not match, return WIMLIB_ERR_INVALID_RESOURCE_HASH. + * + * COMPUTE_MISSING_BLOB_HASHES + * For all blobs being read that have not yet had their SHA-1 + * message digests computed, calculate and save their SHA-1 message + * digests. + * + * BLOB_LIST_ALREADY_SORTED + * @blob_list is already sorted in sequential order for reading. + * + * The callback functions are allowed to delete the current blob from the list + * if necessary. * * Returns 0 on success; a nonzero error code on failure. Failure can occur due * to an error reading the data or due to an error status being returned by any * of the callback functions. */ int -read_stream_list(struct list_head *stream_list, - size_t list_head_offset, - u32 cb_chunk_size, - const struct read_stream_list_callbacks *cbs) +read_blob_list(struct list_head *blob_list, + size_t list_head_offset, + const struct read_blob_list_callbacks *cbs, + int flags) { int ret; struct list_head *cur, *next; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; + struct hasher_context *hasher_ctx; + struct read_blob_list_callbacks *sink_cbs; - ret = sort_stream_list_by_sequential_order(stream_list, list_head_offset); - if (ret) - return ret; + if (!(flags & BLOB_LIST_ALREADY_SORTED)) { + ret = sort_blob_list_by_sequential_order(blob_list, list_head_offset); + if (ret) + return ret; + } - for (cur = stream_list->next, next = cur->next; - cur != stream_list; + if (flags & (VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES)) { + hasher_ctx = alloca(sizeof(*hasher_ctx)); + *hasher_ctx = (struct hasher_context) { + .flags = flags, + .cbs = *cbs, + }; + sink_cbs = alloca(sizeof(*sink_cbs)); + *sink_cbs = (struct read_blob_list_callbacks) { + .begin_blob = hasher_begin_blob, + .begin_blob_ctx = hasher_ctx, + .consume_chunk = hasher_consume_chunk, + .consume_chunk_ctx = hasher_ctx, + .end_blob = hasher_end_blob, + .end_blob_ctx = hasher_ctx, + }; + } else { + sink_cbs = (struct read_blob_list_callbacks*)cbs; + } + + for (cur = blob_list->next, next = cur->next; + cur != blob_list; cur = next, next = cur->next) { - lte = (struct wim_lookup_table_entry*)((u8*)cur - list_head_offset); - - if (lte_is_partial(lte)) { + blob = (struct blob_descriptor*)((u8*)cur - list_head_offset); - struct wim_lookup_table_entry *lte_next, *lte_last; + if (blob->blob_location == BLOB_IN_WIM && + blob->size != blob->rdesc->uncompressed_size) + { + struct blob_descriptor *blob_next, *blob_last; struct list_head *next2; - size_t stream_count; + u64 blob_count; - /* The next stream is a proper sub-sequence of a WIM - * resource. See if there are other streams in the same + /* The next blob is a proper sub-sequence of a WIM + * resource. See if there are other blobs in the same * resource that need to be read. Since - * sort_stream_list_by_sequential_order() sorted the - * streams by offset in the WIM, this can be determined - * by simply scanning forward in the list. */ + * sort_blob_list_by_sequential_order() sorted the blobs + * by offset in the WIM, this can be determined by + * simply scanning forward in the list. */ - lte_last = lte; - stream_count = 1; + blob_last = blob; + blob_count = 1; for (next2 = next; - next2 != stream_list - && (lte_next = (struct wim_lookup_table_entry*) + next2 != blob_list + && (blob_next = (struct blob_descriptor*) ((u8*)next2 - list_head_offset), - lte_next->resource_location == RESOURCE_IN_WIM - && lte_next->rspec == lte->rspec); + blob_next->blob_location == BLOB_IN_WIM + && blob_next->rdesc == blob->rdesc); next2 = next2->next) { - lte_last = lte_next; - stream_count++; + blob_last = blob_next; + blob_count++; } - if (stream_count > 1) { - /* Reading multiple streams combined into a - * single WIM resource. They are in the stream - * list, sorted by offset; @lte specifies the - * first stream in the resource that needs to be - * read and @lte_last specifies the last stream - * in the resource that needs to be read. */ - - DEBUG("Reading %zu streams combined in same " - "WIM resource", stream_count); - + if (blob_count > 1) { + /* Reading multiple blobs combined into a single + * WIM resource. They are in the blob list, + * sorted by offset; @blob specifies the first + * blob in the resource that needs to be read + * and @blob_last specifies the last blob in the + * resource that needs to be read. */ next = next2; - - struct data_range ranges[stream_count]; - - { - struct list_head *next3; - size_t i; - struct wim_lookup_table_entry *lte_cur; - - next3 = cur; - for (i = 0; i < stream_count; i++) { - lte_cur = (struct wim_lookup_table_entry*) - ((u8*)next3 - list_head_offset); - ranges[i].offset = lte_cur->offset_in_res; - ranges[i].size = lte_cur->size; - next3 = next3->next; - } - } - - /* Set up a chain of callbacks. - * - * The first level is the - * streamifier_cb, - * which takes in chunks of data and divides - * them into the constituent streams. - * - * The second level are the SHA1 message digest - * callbacks, which checksum each stream. - * - * rechunkifier_cb handles dividing the read - * data into chunks of maximum size - * @cb_chunk_size. If @cb_chunk_size is 0, then - * this callback is not needed. - * - * Finally, the last level of callbacks are - * @cbs, passed as arguments to this function. - */ - - struct rechunkifier_context *rechunkifier_ctx = NULL; - consume_data_callback_t last_cb; - void *last_cb_ctx; - - if (cb_chunk_size != 0) { - rechunkifier_ctx = alloca(sizeof(*rechunkifier_ctx)); - *rechunkifier_ctx = (struct rechunkifier_context) { - .buffer = MALLOC(cb_chunk_size), - .buffer_filled = 0, - .cb_chunk_size = cb_chunk_size, - .ranges = ranges, - .num_ranges = stream_count, - .cur_range = 0, - .range_bytes_remaining = ranges[0].size, - .cb = cbs->consume_chunk, - .cb_ctx = cbs->consume_chunk_ctx, - }; - - if (rechunkifier_ctx->buffer == NULL) - return WIMLIB_ERR_NOMEM; - last_cb = rechunkifier_cb; - last_cb_ctx = rechunkifier_ctx; - } else { - rechunkifier_ctx = NULL; - last_cb = cbs->consume_chunk; - last_cb_ctx = cbs->consume_chunk_ctx; - } - - struct hasher_context hasher_ctx = { - .cbs = { - .begin_stream = cbs->begin_stream, - .begin_stream_ctx = cbs->begin_stream_ctx, - .consume_chunk = last_cb, - .consume_chunk_ctx = last_cb_ctx, - .end_stream = cbs->end_stream, - .end_stream_ctx = cbs->end_stream_ctx, - }, - }; - - struct streamifier_context streamifier_ctx = { - .cbs = { - .begin_stream = hasher_begin_stream, - .begin_stream_ctx = &hasher_ctx, - .consume_chunk = hasher_consume_chunk, - .consume_chunk_ctx = &hasher_ctx, - .end_stream = hasher_end_stream, - .end_stream_ctx = &hasher_ctx, - }, - .cur_stream = lte, - .cur_stream_offset = 0, - .final_stream = lte_last, - .list_head_offset = list_head_offset, - }; - - ret = read_compressed_wim_resource(lte->rspec, - ranges, - stream_count, - streamifier_cb, - &streamifier_ctx, - false); - if (rechunkifier_ctx != NULL) - FREE(rechunkifier_ctx->buffer); - - if (ret) { - if (streamifier_ctx.cur_stream_offset != 0) { - ret = (*streamifier_ctx.cbs.end_stream) - (streamifier_ctx.cur_stream, - ret, - streamifier_ctx.cbs.end_stream_ctx); - } + ret = read_blobs_in_solid_resource(blob, blob_last, + blob_count, + list_head_offset, + sink_cbs); + if (ret) return ret; - } continue; } } - ret = read_full_stream_with_sha1(lte, cbs); - if (ret > 0) + ret = read_full_blob_with_cbs(blob, sink_cbs); + if (ret && ret != BEGIN_BLOB_STATUS_SKIP_BLOB) return ret; } return 0; } -/* Extract the first @size bytes of the specified stream. +/* + * Extract the first @size bytes of the specified blob. * - * If @size specifies the full uncompressed size of the stream, then the SHA1 - * message digest of the uncompressed stream is checked while being extracted. + * If @size specifies the full uncompressed size of the blob, then the SHA-1 + * message digest of the uncompressed blob is checked while being extracted. * - * The uncompressed data of the resource is passed in chunks of unspecified size - * to the @extract_chunk function, passing it @extract_chunk_arg. */ + * The uncompressed data of the blob is passed in chunks of unspecified size to + * the @extract_chunk function, passing it @extract_chunk_arg. + */ int -extract_stream(struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t extract_chunk, void *extract_chunk_arg) +extract_blob(struct blob_descriptor *blob, u64 size, + consume_data_callback_t extract_chunk, void *extract_chunk_arg) { - if (size == lte->size) { - /* Do SHA1. */ - struct read_stream_list_callbacks cbs = { + wimlib_assert(size <= blob->size); + if (size == blob->size) { + /* Do SHA-1. */ + struct read_blob_list_callbacks cbs = { .consume_chunk = extract_chunk, .consume_chunk_ctx = extract_chunk_arg, }; - return read_full_stream_with_sha1(lte, &cbs); + return read_full_blob_with_sha1(blob, &cbs); } else { - /* Don't do SHA1. */ - return read_stream_prefix(lte, size, extract_chunk, - extract_chunk_arg, 0); + /* Don't do SHA-1. */ + return read_blob_prefix(blob, size, extract_chunk, + extract_chunk_arg); } } /* A consume_data_callback_t implementation that writes the chunk of data to a * file descriptor. */ -int +static int extract_chunk_to_fd(const void *chunk, size_t size, void *_fd_p) { struct filedes *fd = _fd_p; @@ -1438,49 +1383,66 @@ extract_chunk_to_fd(const void *chunk, size_t size, void *_fd_p) return 0; } -/* Extract the first @size bytes of the specified stream to the specified file +/* Extract the first @size bytes of the specified blob to the specified file * descriptor. */ int -extract_stream_to_fd(struct wim_lookup_table_entry *lte, - struct filedes *fd, u64 size) +extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd, u64 size) +{ + return extract_blob(blob, size, extract_chunk_to_fd, fd); +} + +/* Extract the full uncompressed contents of the specified blob to the specified + * file descriptor. */ +int +extract_full_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd) { - return extract_stream(lte, size, extract_chunk_to_fd, fd); + return extract_blob_to_fd(blob, fd, blob->size); } -/* Calculate the SHA1 message digest of a stream and store it in @lte->hash. */ +/* Calculate the SHA-1 message digest of a blob and store it in @blob->hash. */ int -sha1_stream(struct wim_lookup_table_entry *lte) +sha1_blob(struct blob_descriptor *blob) { - wimlib_assert(lte->unhashed); - struct read_stream_list_callbacks cbs = { + wimlib_assert(blob->unhashed); + struct read_blob_list_callbacks cbs = { }; - return read_full_stream_with_sha1(lte, &cbs); + return read_full_blob_with_sha1(blob, &cbs); } -/* Convert a short WIM resource header to a stand-alone WIM resource - * specification. */ +/* + * Convert a short WIM resource header to a stand-alone WIM resource descriptor. + * + * Note: for solid resources some fields still need to be overridden. + */ void -wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim, - struct wim_resource_spec *spec) +wim_res_hdr_to_desc(const struct wim_reshdr *reshdr, WIMStruct *wim, + struct wim_resource_descriptor *rdesc) { - spec->wim = wim; - spec->offset_in_wim = reshdr->offset_in_wim; - spec->size_in_wim = reshdr->size_in_wim; - spec->uncompressed_size = reshdr->uncompressed_size; - INIT_LIST_HEAD(&spec->stream_list); - spec->flags = reshdr->flags; - spec->is_pipable = wim_is_pipable(wim); + rdesc->wim = wim; + rdesc->offset_in_wim = reshdr->offset_in_wim; + rdesc->size_in_wim = reshdr->size_in_wim; + rdesc->uncompressed_size = reshdr->uncompressed_size; + INIT_LIST_HEAD(&rdesc->blob_list); + rdesc->flags = reshdr->flags; + rdesc->is_pipable = wim_is_pipable(wim); + if (rdesc->flags & WIM_RESHDR_FLAG_COMPRESSED) { + rdesc->compression_type = wim->compression_type; + rdesc->chunk_size = wim->chunk_size; + } else { + rdesc->compression_type = WIMLIB_COMPRESSION_TYPE_NONE; + rdesc->chunk_size = 0; + } } -/* Convert a stand-alone resource specification to a WIM resource header. */ +/* Convert a stand-alone resource descriptor to a WIM resource header. */ void -wim_res_spec_to_hdr(const struct wim_resource_spec *rspec, +wim_res_desc_to_hdr(const struct wim_resource_descriptor *rdesc, struct wim_reshdr *reshdr) { - reshdr->offset_in_wim = rspec->offset_in_wim; - reshdr->size_in_wim = rspec->size_in_wim; - reshdr->flags = rspec->flags; - reshdr->uncompressed_size = rspec->uncompressed_size; + reshdr->offset_in_wim = rdesc->offset_in_wim; + reshdr->size_in_wim = rdesc->size_in_wim; + reshdr->flags = rdesc->flags; + reshdr->uncompressed_size = rdesc->uncompressed_size; } /* Translates a WIM resource header from the on-disk format into an in-memory @@ -1499,7 +1461,6 @@ get_wim_reshdr(const struct wim_reshdr_disk *disk_reshdr, ((u64)disk_reshdr->size_in_wim[6] << 48)); reshdr->uncompressed_size = le64_to_cpu(disk_reshdr->uncompressed_size); reshdr->flags = disk_reshdr->flags; - return 0; } /* Translates a WIM resource header from an in-memory format into the on-disk