4 * Read uncompressed and compressed metadata and file resources from a WIM file.
8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free Software
14 * Foundation; either version 3 of the License, or (at your option) any later
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License along with
22 * wimlib; if not, see http://www.gnu.org/licenses/.
30 #include "wimlib/endianness.h"
31 #include "wimlib/error.h"
32 #include "wimlib/file_io.h"
33 #include "wimlib/lookup_table.h"
34 #include "wimlib/resource.h"
35 #include "wimlib/sha1.h"
38 /* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */
39 # include "wimlib/win32.h"
43 /* for read_ntfs_file_prefix() */
44 # include "wimlib/ntfs_3g.h"
56 * Compressed WIM resources
58 * A compressed resource in a WIM consists of a number of compressed chunks,
59 * each of which decompresses to a fixed chunk size (given in the WIM header;
60 * usually 32768) except possibly the last, which always decompresses to any
61 * remaining bytes. In addition, immediately before the chunks, a table (the
62 * "chunk table") provides the offset, in bytes relative to the end of the chunk
63 * table, of the start of each compressed chunk, except for the first chunk
64 * which is omitted as it always has an offset of 0. Therefore, a compressed
65 * resource with N chunks will have a chunk table with N - 1 entries.
67 * Additional information:
69 * - Entries in the chunk table are 4 bytes each, except if the uncompressed
70 * size of the resource is greater than 4 GiB, in which case the entries in
71 * the chunk table are 8 bytes each. In either case, the entries are unsigned
72 * little-endian integers.
74 * - The chunk table is included in the compressed size of the resource provided
75 * in the corresponding entry in the WIM's stream lookup table.
77 * - The compressed size of a chunk is never greater than the uncompressed size.
78 * From the compressor's point of view, chunks that would have compressed to a
79 * size greater than or equal to their original size are in fact stored
80 * uncompressed. From the decompresser's point of view, chunks with
81 * compressed size equal to their uncompressed size are in fact uncompressed.
83 * Furthermore, wimlib supports its own "pipable" WIM format, and for this the
84 * structure of compressed resources was modified to allow piped reading and
85 * writing. To make sequential writing possible, the chunk table is placed
86 * after the chunks rather than before the chunks, and to make sequential
87 * reading possible, each chunk is prefixed with a 4-byte header giving its
88 * compressed size as a 32-bit, unsigned, little-endian integer. Otherwise the
89 * details are the same.
93 /* Decompress the specified chunk that uses the specified compression type
94 * @ctype, part of a WIM with default chunk size @wim_chunk_size. For LZX the
95 * separate @wim_chunk_size is needed because it determines the window size used
96 * for LZX compression. */
98 decompress(const void *cchunk, unsigned clen, void *uchunk, unsigned ulen,
99 int ctype, u32 wim_chunk_size)
102 case WIMLIB_COMPRESSION_TYPE_LZX:
103 return wimlib_lzx_decompress2(cchunk, clen,
104 uchunk, ulen, wim_chunk_size);
105 case WIMLIB_COMPRESSION_TYPE_XPRESS:
106 return wimlib_xpress_decompress(cchunk, clen,
108 case WIMLIB_COMPRESSION_TYPE_LZMS:
109 return wimlib_lzms_decompress(cchunk, clen, uchunk, ulen);
116 /* Read data from a compressed WIM resource. Assumes parameters were already
117 * verified by read_partial_wim_resource(). */
119 read_compressed_wim_resource(const struct wim_resource_spec * const rspec,
120 const u64 size, const consume_data_callback_t cb,
121 const u32 cb_chunk_size, void * const ctx_or_buf,
122 const int flags, const u64 offset)
127 const u32 orig_chunk_size = rspec->cchunk_size;
128 const u32 orig_chunk_order = bsr32(orig_chunk_size);
130 wimlib_assert(is_power_of_2(orig_chunk_size));
132 /* Handle the trivial case. */
136 if (rspec->ctype == WIMLIB_COMPRESSION_TYPE_LZMS) {
139 unsigned clen = rspec->size_in_wim;
140 unsigned ulen = rspec->uncompressed_size;
141 unsigned lzms_offset;
143 fprintf(stderr, "clen=%u, ulen=%u, offset=%lu\n", clen, ulen,
144 rspec->offset_in_wim);
146 u8 *cbuf = MALLOC(clen);
147 u8 *ubuf = MALLOC(ulen);
150 ret = full_pread(&rspec->wim->in_fd,
151 cbuf, clen, rspec->offset_in_wim);
153 ERROR_WITH_ERRNO("Can't read compressed data");
157 if (clen <= rspec->cchunk_size)
162 ret = wimlib_lzms_decompress(cbuf + lzms_offset,
166 ERROR("LZMS decompression error.");
168 ret = WIMLIB_ERR_DECOMPRESSION;
173 for (u64 i = offset; i < offset + size; i += chunk_size) {
174 chunk_size = min(offset + size - i, cb_chunk_size);
175 ret = cb(&ubuf[i], chunk_size, ctx_or_buf);
180 memcpy(ctx_or_buf, &ubuf[offset], size);
189 u64 *chunk_offsets = NULL;
192 void *compressed_buf = NULL;
193 bool chunk_offsets_malloced = false;
194 bool out_buf_malloced = false;
195 bool tmp_buf_malloced = false;
196 bool compressed_buf_malloced = false;
198 /* Get the file descriptor for the WIM. */
199 struct filedes * const in_fd = &rspec->wim->in_fd;
201 /* Determine if we're reading a pipable resource from a pipe or not. */
202 const bool is_pipe_read = !filedes_is_seekable(in_fd);
204 /* Calculate the number of chunks the resource is divided into. */
205 const u64 num_chunks = (rspec->uncompressed_size + orig_chunk_size - 1) >> orig_chunk_order;
207 /* Calculate the 0-based index of the chunk at which the read starts.
209 const u64 start_chunk = offset >> orig_chunk_order;
211 /* For pipe reads, we always must start from the 0th chunk. */
212 const u64 actual_start_chunk = (is_pipe_read ? 0 : start_chunk);
214 /* Calculate the offset, within the start chunk, of the first byte of
216 const u32 start_offset_in_chunk = offset & (orig_chunk_size - 1);
218 /* Calculate the index of the chunk that contains the last byte of the
220 const u64 end_chunk = (offset + size - 1) >> orig_chunk_order;
222 /* Calculate the offset, within the end chunk, of the last byte of the
224 const u32 end_offset_in_chunk = (offset + size - 1) & (orig_chunk_size - 1);
226 /* Calculate the number of entries in the chunk table; it's one less
227 * than the number of chunks, since the first chunk has no entry. */
228 const u64 num_chunk_entries = num_chunks - 1;
230 /* Set the size of each chunk table entry based on the resource's
231 * uncompressed size. */
232 const u64 chunk_entry_size = (rspec->uncompressed_size > (1ULL << 32)) ? 8 : 4;
234 /* Calculate the size, in bytes, of the full chunk table. */
235 const u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
237 /* Current offset to read from. */
238 u64 cur_read_offset = rspec->offset_in_wim;
240 /* Read the chunk table into memory. */
242 /* Calculate the number of chunk entries are actually needed to
243 * read the requested part of the resource. Include an entry
244 * for the first chunk even though that doesn't exist in the
245 * on-disk table, but take into account that if the last chunk
246 * required for the read is not the last chunk of the resource,
247 * an extra chunk entry is needed so that the compressed size of
248 * the last chunk of the read can be determined. */
249 const u64 num_alloc_chunk_entries = end_chunk - start_chunk +
250 1 + (end_chunk != num_chunks - 1);
252 /* Allocate a buffer to hold a subset of the chunk table. It
253 * will only contain offsets for the chunks that are actually
254 * needed for this read. For speed, allocate the buffer on the
255 * stack unless it's too large. */
256 if ((size_t)(num_alloc_chunk_entries * sizeof(u64)) !=
257 (num_alloc_chunk_entries * sizeof(u64)))
260 if (num_alloc_chunk_entries <= STACK_MAX / sizeof(u64)) {
261 chunk_offsets = alloca(num_alloc_chunk_entries * sizeof(u64));
263 chunk_offsets = MALLOC(num_alloc_chunk_entries * sizeof(u64));
264 if (chunk_offsets == NULL)
266 chunk_offsets_malloced = true;
269 /* Set the implicit offset of the first chunk if it's included
270 * in the needed chunks. */
271 if (start_chunk == 0)
272 chunk_offsets[0] = 0;
274 /* Calculate the index of the first needed entry in the chunk
276 const u64 start_table_idx = (start_chunk == 0) ?
279 /* Calculate the number of entries that need to be read from the
281 const u64 num_needed_chunk_entries = (start_chunk == 0) ?
282 num_alloc_chunk_entries - 1 : num_alloc_chunk_entries;
284 /* Calculate the number of bytes of data that need to be read
285 * from the chunk table. */
286 const size_t chunk_table_needed_size =
287 num_needed_chunk_entries * chunk_entry_size;
289 /* Calculate the byte offset, in the WIM file, of the first
290 * chunk table entry to read. Take into account that if the WIM
291 * file is in the special "pipable" format, then the chunk table
292 * is at the end of the resource, not the beginning. */
293 const u64 file_offset_of_needed_chunk_entries =
295 + (start_table_idx * chunk_entry_size)
296 + (rspec->is_pipable ? (rspec->size_in_wim - chunk_table_size) : 0);
298 /* Read the needed chunk table entries into the end of the
299 * chunk_offsets buffer. */
300 void * const chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
301 chunk_table_needed_size;
302 ret = full_pread(in_fd, chunk_tab_data, chunk_table_needed_size,
303 file_offset_of_needed_chunk_entries);
307 /* Now fill in chunk_offsets from the entries we have read in
308 * chunk_tab_data. Careful: chunk_offsets aliases
309 * chunk_tab_data, which breaks C's aliasing rules when we read
310 * 32-bit integers and store 64-bit integers. But since the
311 * operations are safe as long as the compiler doesn't mess with
312 * their order, we use the gcc may_alias extension to tell the
313 * compiler that loads from the 32-bit integers may alias stores
314 * to the 64-bit integers. */
316 typedef le64 __attribute__((may_alias)) aliased_le64_t;
317 typedef le32 __attribute__((may_alias)) aliased_le32_t;
318 u64 * const chunk_offsets_p = chunk_offsets + (start_chunk == 0);
321 if (chunk_entry_size == 4) {
322 aliased_le32_t *raw_entries = (aliased_le32_t*)chunk_tab_data;
323 for (i = 0; i < num_needed_chunk_entries; i++)
324 chunk_offsets_p[i] = le32_to_cpu(raw_entries[i]);
326 aliased_le64_t *raw_entries = (aliased_le64_t*)chunk_tab_data;
327 for (i = 0; i < num_needed_chunk_entries; i++)
328 chunk_offsets_p[i] = le64_to_cpu(raw_entries[i]);
332 /* Set offset to beginning of first chunk to read. */
333 cur_read_offset += chunk_offsets[0];
334 if (rspec->is_pipable)
335 cur_read_offset += start_chunk * sizeof(struct pwm_chunk_hdr);
337 cur_read_offset += chunk_table_size;
340 /* If using a callback function, allocate a temporary buffer that will
341 * hold data being passed to it. If writing directly to a buffer
342 * instead, arrange to write data directly into it. */
344 u8 *out_buf_end, *out_p;
346 out_buf_size = max(cb_chunk_size, orig_chunk_size);
347 if (out_buf_size <= STACK_MAX) {
348 out_buf = alloca(out_buf_size);
350 out_buf = MALLOC(out_buf_size);
353 out_buf_malloced = true;
357 out_buf = ctx_or_buf;
359 out_buf_end = out_buf + out_buf_size;
362 /* Unless the raw compressed data was requested, allocate a temporary
363 * buffer for reading compressed chunks, each of which can be at most
364 * @orig_chunk_size - 1 bytes. This excludes compressed chunks that are
365 * a full @orig_chunk_size bytes, which are actually stored
367 if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS)) {
368 if (orig_chunk_size - 1 <= STACK_MAX) {
369 compressed_buf = alloca(orig_chunk_size - 1);
371 compressed_buf = MALLOC(orig_chunk_size - 1);
372 if (compressed_buf == NULL)
374 compressed_buf_malloced = true;
378 /* Allocate yet another temporary buffer, this one for decompressing
379 * chunks for which only part of the data is needed. */
380 if (start_offset_in_chunk != 0 ||
381 (end_offset_in_chunk != orig_chunk_size - 1 &&
382 offset + size != rspec->uncompressed_size))
384 if (orig_chunk_size <= STACK_MAX) {
385 tmp_buf = alloca(orig_chunk_size);
387 tmp_buf = MALLOC(orig_chunk_size);
390 tmp_buf_malloced = true;
394 /* Read, and possibly decompress, each needed chunk, either writing the
395 * data directly into the @ctx_or_buf buffer or passing it to the @cb
396 * callback function. */
397 for (u64 i = actual_start_chunk; i <= end_chunk; i++) {
399 /* Calculate uncompressed size of next chunk. */
401 if ((i == num_chunks - 1) && (rspec->uncompressed_size & (orig_chunk_size - 1)))
402 chunk_usize = (rspec->uncompressed_size & (orig_chunk_size - 1));
404 chunk_usize = orig_chunk_size;
406 /* Calculate compressed size of next chunk. */
409 struct pwm_chunk_hdr chunk_hdr;
411 ret = full_pread(in_fd, &chunk_hdr,
412 sizeof(chunk_hdr), cur_read_offset);
415 chunk_csize = le32_to_cpu(chunk_hdr.compressed_size);
417 if (i == num_chunks - 1) {
418 chunk_csize = rspec->size_in_wim -
420 chunk_offsets[i - start_chunk];
421 if (rspec->is_pipable)
422 chunk_csize -= num_chunks * sizeof(struct pwm_chunk_hdr);
424 chunk_csize = chunk_offsets[i + 1 - start_chunk] -
425 chunk_offsets[i - start_chunk];
428 if (chunk_csize == 0 || chunk_csize > chunk_usize) {
429 ERROR("Invalid chunk size in compressed resource!");
431 ret = WIMLIB_ERR_DECOMPRESSION;
432 goto out_free_memory;
434 if (rspec->is_pipable)
435 cur_read_offset += sizeof(struct pwm_chunk_hdr);
437 if (i >= start_chunk) {
438 /* Calculate how much of this chunk needs to be read. */
439 u32 chunk_needed_size;
440 u32 start_offset = 0;
441 u32 end_offset = orig_chunk_size - 1;
443 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
444 chunk_needed_size = chunk_csize;
446 if (i == start_chunk)
447 start_offset = start_offset_in_chunk;
450 end_offset = end_offset_in_chunk;
452 chunk_needed_size = end_offset + 1 - start_offset;
455 if (chunk_csize == chunk_usize ||
456 (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
458 /* Read the raw chunk data. */
460 ret = full_pread(in_fd,
463 cur_read_offset + start_offset);
467 /* Read and decompress the chunk. */
471 ret = full_pread(in_fd,
478 if (chunk_needed_size == chunk_usize)
483 ret = decompress(compressed_buf,
490 ERROR("Failed to decompress data!");
491 ret = WIMLIB_ERR_DECOMPRESSION;
493 goto out_free_memory;
495 if (chunk_needed_size != chunk_usize)
496 memcpy(out_p, tmp_buf + start_offset,
500 out_p += chunk_needed_size;
503 /* Feed the data to the callback function. */
505 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
506 ret = cb(out_buf, out_p - out_buf, ctx_or_buf);
508 goto out_free_memory;
510 } else if (i == end_chunk || out_p == out_buf_end) {
514 for (p = out_buf; p != out_p; p += bytes_sent) {
515 bytes_sent = min(cb_chunk_size, out_p - p);
516 ret = cb(p, bytes_sent, ctx_or_buf);
518 goto out_free_memory;
523 cur_read_offset += chunk_csize;
527 /* Skip data only. */
528 cur_read_offset += chunk_csize;
529 ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1);
536 && size == rspec->uncompressed_size
540 /* Skip chunk table at end of pipable resource. */
542 cur_read_offset += chunk_table_size;
543 ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1);
550 if (chunk_offsets_malloced)
552 if (out_buf_malloced)
554 if (compressed_buf_malloced)
555 FREE(compressed_buf);
556 if (tmp_buf_malloced)
562 ERROR("Not enough memory available to read size=%"PRIu64" bytes "
563 "from compressed resource!", size);
565 ret = WIMLIB_ERR_NOMEM;
566 goto out_free_memory;
569 ERROR_WITH_ERRNO("Error reading compressed file resource!");
570 goto out_free_memory;
573 /* Read raw data from a file descriptor at the specified offset. */
575 read_raw_file_data(struct filedes *in_fd, u64 size, consume_data_callback_t cb,
576 u32 cb_chunk_size, void *ctx_or_buf, u64 offset)
580 bool tmp_buf_malloced = false;
583 /* Send data to callback function in chunks. */
584 if (cb_chunk_size <= STACK_MAX) {
585 tmp_buf = alloca(cb_chunk_size);
587 tmp_buf = MALLOC(cb_chunk_size);
588 if (tmp_buf == NULL) {
589 ret = WIMLIB_ERR_NOMEM;
592 tmp_buf_malloced = true;
596 size_t bytes_to_read = min(cb_chunk_size, size);
597 ret = full_pread(in_fd, tmp_buf, bytes_to_read,
601 ret = cb(tmp_buf, bytes_to_read, ctx_or_buf);
604 size -= bytes_to_read;
605 offset += bytes_to_read;
608 /* Read data directly into buffer. */
609 ret = full_pread(in_fd, ctx_or_buf, size, offset);
617 ERROR_WITH_ERRNO("Read error");
619 if (tmp_buf_malloced)
625 * read_partial_wim_resource()-
627 * Read a range of data from an uncompressed or compressed resource in a WIM
628 * file. Data is written into a buffer or fed into a callback function, as
629 * documented in read_stream_prefix().
631 * By default, this function provides the uncompressed data of the resource, and
632 * @size and @offset and interpreted relative to the uncompressed contents of
633 * the resource. This behavior can be modified by either of the following
636 * WIMLIB_READ_RESOURCE_FLAG_RAW_FULL:
637 * Read @size bytes at @offset of the raw contents of the compressed
638 * resource. In the case of pipable resources, this excludes the stream
639 * header. Exclusive with WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS.
641 * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
642 * Read the raw compressed chunks of the compressed resource. @size must
643 * be the full uncompressed size, @offset must be 0, and @cb_chunk_size
644 * must be the resource chunk size.
647 * WIMLIB_ERR_SUCCESS (0)
648 * WIMLIB_ERR_READ (errno set)
649 * WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0)
650 * WIMLIB_ERR_NOMEM (errno set to ENOMEM)
651 * WIMLIB_ERR_DECOMPRESSION (errno set to EINVAL)
653 * or other error code returned by the @cb function.
656 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
657 u64 size, consume_data_callback_t cb,
658 u32 cb_chunk_size, void *ctx_or_buf,
659 int flags, u64 offset)
661 const struct wim_resource_spec *rspec;
662 struct filedes *in_fd;
664 /* Verify parameters. */
665 wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
667 in_fd = &rspec->wim->in_fd;
669 wimlib_assert(is_power_of_2(cb_chunk_size));
670 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
671 /* Raw chunks mode is subject to the restrictions noted. */
672 wimlib_assert(!lte_is_partial(lte));
673 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL));
674 wimlib_assert(cb_chunk_size == rspec->cchunk_size);
675 wimlib_assert(size == rspec->uncompressed_size);
676 wimlib_assert(offset == 0);
677 } else if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) {
678 /* Raw full mode: read must not overrun end of store size. */
679 wimlib_assert(!lte_is_partial(lte));
680 wimlib_assert(offset + size >= size &&
681 offset + size <= rspec->size_in_wim);
683 /* Normal mode: read must not overrun end of original size. */
684 wimlib_assert(offset + size >= size &&
685 lte->offset_in_res + offset + size <= rspec->uncompressed_size);
688 DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64"[+%"PRIu64"] "
689 "from %"PRIu64"(%"PRIu64") @ +%"PRIu64" "
690 "(readflags 0x%08x, resflags 0x%02x%s)",
691 size, offset, lte->offset_in_res,
693 rspec->uncompressed_size,
694 rspec->offset_in_wim,
696 (rspec->is_pipable ? ", pipable" : ""));
698 if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) ||
699 rspec->ctype == WIMLIB_COMPRESSION_TYPE_NONE)
701 return read_raw_file_data(in_fd,
706 offset + rspec->offset_in_wim);
708 return read_compressed_wim_resource(rspec, size, cb,
710 ctx_or_buf, flags, offset + lte->offset_in_res);
715 read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte,
716 size_t size, u64 offset, void *buf)
718 return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset);
722 read_wim_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size,
723 consume_data_callback_t cb, u32 cb_chunk_size,
724 void *ctx_or_buf, int flags)
726 return read_partial_wim_resource(lte, size, cb, cb_chunk_size,
727 ctx_or_buf, flags, 0);
731 /* This function handles reading stream data that is located in an external
732 * file, such as a file that has been added to the WIM image through execution
733 * of a wimlib_add_command.
735 * This assumes the file can be accessed using the standard POSIX open(),
736 * read(), and close(). On Windows this will not necessarily be the case (since
737 * the file may need FILE_FLAG_BACKUP_SEMANTICS to be opened, or the file may be
738 * encrypted), so Windows uses its own code for its equivalent case.
741 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size,
742 consume_data_callback_t cb, u32 cb_chunk_size,
743 void *ctx_or_buf, int _ignored_flags)
749 wimlib_assert(size <= lte->size);
750 DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, lte->file_on_disk);
752 raw_fd = open(lte->file_on_disk, O_BINARY | O_RDONLY);
754 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", lte->file_on_disk);
755 return WIMLIB_ERR_OPEN;
757 filedes_init(&fd, raw_fd);
758 ret = read_raw_file_data(&fd, size, cb, cb_chunk_size, ctx_or_buf, 0);
762 #endif /* !__WIN32__ */
764 /* This function handles the trivial case of reading stream data that is, in
765 * fact, already located in an in-memory buffer. */
767 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
768 u64 size, consume_data_callback_t cb,
769 u32 cb_chunk_size, void *ctx_or_buf, int _ignored_flags)
771 wimlib_assert(size <= lte->size);
774 /* Feed the data into the callback function in
775 * appropriately-sized chunks. */
779 for (u64 offset = 0; offset < size; offset += chunk_size) {
780 chunk_size = min(cb_chunk_size, size - offset);
781 ret = cb((const u8*)lte->attached_buffer + offset,
782 chunk_size, ctx_or_buf);
787 /* Copy the data directly into the specified buffer. */
788 memcpy(ctx_or_buf, lte->attached_buffer, size);
793 typedef int (*read_stream_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
794 u64 size, consume_data_callback_t cb,
795 u32 cb_chunk_size, void *ctx_or_buf,
799 * read_stream_prefix()-
801 * Reads the first @size bytes from a generic "stream", which may be located in
802 * any one of several locations, such as in a WIM file (compressed or
803 * uncompressed), in an external file, or directly in an in-memory buffer.
805 * This function feeds the data either to a callback function (@cb != NULL,
806 * passing it @ctx_or_buf), or write it directly into a buffer (@cb == NULL,
807 * @ctx_or_buf specifies the buffer, which must have room for at least @size
810 * When (@cb != NULL), @cb_chunk_size specifies the maximum size of data chunks
811 * to feed the callback function. @cb_chunk_size must be positive, and if the
812 * stream is in a WIM file, must be a power of 2. All chunks, except possibly
813 * the last one, will be this size. If (@cb == NULL), @cb_chunk_size is
816 * If the stream is located in a WIM file, @flags can be set as documented in
817 * read_partial_wim_resource(). Otherwise @flags are ignored.
819 * Returns 0 on success; nonzero on error. A nonzero value will be returned if
820 * the stream data cannot be successfully read (for a number of different
821 * reasons, depending on the stream location), or if a callback function was
822 * specified and it returned nonzero.
825 read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size,
826 consume_data_callback_t cb, u32 cb_chunk_size,
827 void *ctx_or_buf, int flags)
829 /* This function merely verifies several preconditions, then passes
830 * control to an appropriate function for understanding each possible
831 * stream location. */
832 static const read_stream_prefix_handler_t handlers[] = {
833 [RESOURCE_IN_WIM] = read_wim_stream_prefix,
835 [RESOURCE_IN_FILE_ON_DISK] = read_win32_file_prefix,
837 [RESOURCE_IN_FILE_ON_DISK] = read_file_on_disk_prefix,
839 [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
841 [RESOURCE_IN_STAGING_FILE] = read_file_on_disk_prefix,
844 [RESOURCE_IN_NTFS_VOLUME] = read_ntfs_file_prefix,
847 [RESOURCE_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix,
850 wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
851 && handlers[lte->resource_location] != NULL);
852 wimlib_assert(cb == NULL || cb_chunk_size > 0);
853 return handlers[lte->resource_location](lte, size, cb, cb_chunk_size,
857 /* Read the full uncompressed data of the specified stream into the specified
858 * buffer, which must have space for at least lte->size bytes. */
860 read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *buf)
862 return read_stream_prefix(lte, lte->size, NULL, 0, buf, 0);
865 /* Read the full uncompressed data of the specified stream. A buffer sufficient
866 * to hold the data is allocated and returned in @buf_ret. */
868 read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte,
874 if ((size_t)lte->size != lte->size) {
875 ERROR("Can't read %"PRIu64" byte stream into "
876 "memory", lte->size);
877 return WIMLIB_ERR_NOMEM;
880 buf = MALLOC(lte->size);
882 return WIMLIB_ERR_NOMEM;
884 ret = read_full_stream_into_buf(lte, buf);
894 /* Retrieve the full uncompressed data of the specified WIM resource. */
896 wim_resource_spec_to_data(struct wim_resource_spec *rspec, void **buf_ret)
899 struct wim_lookup_table_entry *lte;
901 lte = new_lookup_table_entry();
903 return WIMLIB_ERR_NOMEM;
906 lte_bind_wim_resource_spec(lte, rspec);
907 lte->flags = rspec->flags;
908 lte->size = rspec->uncompressed_size;
909 lte->offset_in_res = 0;
911 ret = read_full_stream_into_alloc_buf(lte, buf_ret);
913 lte_unbind_wim_resource_spec(lte);
914 free_lookup_table_entry(lte);
918 /* Retrieve the full uncompressed data of the specified WIM resource. */
920 wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim, void **buf_ret)
922 DEBUG("offset_in_wim=%"PRIu64", size_in_wim=%"PRIu64", "
923 "uncompressed_size=%"PRIu64,
924 reshdr->offset_in_wim, reshdr->size_in_wim, reshdr->uncompressed_size);
926 struct wim_resource_spec rspec;
927 wim_res_hdr_to_spec(reshdr, wim, &rspec);
928 return wim_resource_spec_to_data(&rspec, buf_ret);
933 consume_data_callback_t extract_chunk;
934 void *extract_chunk_arg;
938 extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size, void *_ctx)
940 struct extract_ctx *ctx = _ctx;
942 sha1_update(&ctx->sha_ctx, chunk, chunk_size);
943 return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg);
946 /* Extracts the first @size bytes of a stream to somewhere. In the process, the
947 * SHA1 message digest of the uncompressed stream is checked if the full stream
948 * is being extracted.
950 * @extract_chunk is a function that will be called to extract each chunk of the
953 extract_stream(const struct wim_lookup_table_entry *lte, u64 size,
954 consume_data_callback_t extract_chunk, void *extract_chunk_arg)
957 if (size == lte->size) {
959 struct extract_ctx ctx;
960 ctx.extract_chunk = extract_chunk;
961 ctx.extract_chunk_arg = extract_chunk_arg;
962 sha1_init(&ctx.sha_ctx);
963 ret = read_stream_prefix(lte, size,
964 extract_chunk_sha1_wrapper,
965 lte_cchunk_size(lte),
968 u8 hash[SHA1_HASH_SIZE];
969 sha1_final(hash, &ctx.sha_ctx);
970 if (!hashes_equal(hash, lte->hash)) {
971 if (wimlib_print_errors) {
972 ERROR("Invalid SHA1 message digest "
973 "on the following WIM stream:");
974 print_lookup_table_entry(lte, stderr);
975 if (lte->resource_location == RESOURCE_IN_WIM)
976 ERROR("The WIM file appears to be corrupt!");
978 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
983 ret = read_stream_prefix(lte, size, extract_chunk,
984 lte_cchunk_size(lte),
985 extract_chunk_arg, 0);
991 extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p)
993 struct filedes *fd = _fd_p;
994 int ret = full_write(fd, buf, len);
996 ERROR_WITH_ERRNO("Error writing to file descriptor");
1000 /* Extract the first @size bytes of the specified stream to the specified file
1001 * descriptor. If @size is the full size of the stream, its SHA1 message digest
1002 * is also checked. */
1004 extract_stream_to_fd(const struct wim_lookup_table_entry *lte,
1005 struct filedes *fd, u64 size)
1007 return extract_stream(lte, size, extract_wim_chunk_to_fd, fd);
1012 sha1_chunk(const void *buf, size_t len, void *ctx)
1014 sha1_update(ctx, buf, len);
1018 /* Calculate the SHA1 message digest of a stream, storing it in @lte->hash. */
1020 sha1_stream(struct wim_lookup_table_entry *lte)
1025 sha1_init(&sha_ctx);
1026 ret = read_stream_prefix(lte, lte->size,
1027 sha1_chunk, lte_cchunk_size(lte),
1030 sha1_final(lte->hash, &sha_ctx);
1035 /* Convert a WIM resource header to a stand-alone resource specification. */
1037 wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim,
1038 struct wim_resource_spec *spec)
1041 spec->offset_in_wim = reshdr->offset_in_wim;
1042 spec->size_in_wim = reshdr->size_in_wim;
1043 spec->uncompressed_size = reshdr->uncompressed_size;
1044 INIT_LIST_HEAD(&spec->lte_list);
1045 spec->flags = reshdr->flags;
1046 spec->is_pipable = wim_is_pipable(wim);
1047 if (spec->flags & (WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_CONCAT)) {
1048 spec->ctype = wim->compression_type;
1049 spec->cchunk_size = wim->chunk_size;
1051 spec->ctype = WIMLIB_COMPRESSION_TYPE_NONE;
1052 spec->cchunk_size = 0;
1056 /* Convert a stand-alone resource specification to a WIM resource header. */
1058 wim_res_spec_to_hdr(const struct wim_resource_spec *rspec,
1059 struct wim_reshdr *reshdr)
1061 reshdr->offset_in_wim = rspec->offset_in_wim;
1062 reshdr->size_in_wim = rspec->size_in_wim;
1063 reshdr->flags = rspec->flags;
1064 reshdr->uncompressed_size = rspec->uncompressed_size;
1067 /* Translates a WIM resource header from the on-disk format into an in-memory
1070 get_wim_reshdr(const struct wim_reshdr_disk *disk_reshdr,
1071 struct wim_reshdr *reshdr)
1073 reshdr->offset_in_wim = le64_to_cpu(disk_reshdr->offset_in_wim);
1074 reshdr->size_in_wim = (((u64)disk_reshdr->size_in_wim[0] << 0) |
1075 ((u64)disk_reshdr->size_in_wim[1] << 8) |
1076 ((u64)disk_reshdr->size_in_wim[2] << 16) |
1077 ((u64)disk_reshdr->size_in_wim[3] << 24) |
1078 ((u64)disk_reshdr->size_in_wim[4] << 32) |
1079 ((u64)disk_reshdr->size_in_wim[5] << 40) |
1080 ((u64)disk_reshdr->size_in_wim[6] << 48));
1081 reshdr->uncompressed_size = le64_to_cpu(disk_reshdr->uncompressed_size);
1082 reshdr->flags = disk_reshdr->flags;
1084 /* Truncate numbers to 62 bits to avoid possible overflows. */
1085 if (reshdr->offset_in_wim & 0xc000000000000000ULL)
1086 return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1088 if (reshdr->uncompressed_size & 0xc000000000000000ULL)
1089 return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1094 /* Translates a WIM resource header from an in-memory format into the on-disk
1097 put_wim_reshdr(const struct wim_reshdr *reshdr,
1098 struct wim_reshdr_disk *disk_reshdr)
1100 disk_reshdr->size_in_wim[0] = reshdr->size_in_wim >> 0;
1101 disk_reshdr->size_in_wim[1] = reshdr->size_in_wim >> 8;
1102 disk_reshdr->size_in_wim[2] = reshdr->size_in_wim >> 16;
1103 disk_reshdr->size_in_wim[3] = reshdr->size_in_wim >> 24;
1104 disk_reshdr->size_in_wim[4] = reshdr->size_in_wim >> 32;
1105 disk_reshdr->size_in_wim[5] = reshdr->size_in_wim >> 40;
1106 disk_reshdr->size_in_wim[6] = reshdr->size_in_wim >> 48;
1107 disk_reshdr->flags = reshdr->flags;
1108 disk_reshdr->offset_in_wim = cpu_to_le64(reshdr->offset_in_wim);
1109 disk_reshdr->uncompressed_size = cpu_to_le64(reshdr->uncompressed_size);