4 * Read uncompressed and compressed metadata and file resources from a WIM file.
8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free Software
14 * Foundation; either version 3 of the License, or (at your option) any later
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License along with
22 * wimlib; if not, see http://www.gnu.org/licenses/.
30 #include "wimlib/dentry.h"
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/lookup_table.h"
35 #include "wimlib/resource.h"
36 #include "wimlib/sha1.h"
39 /* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */
40 # include "wimlib/win32.h"
44 /* for read_ntfs_file_prefix() */
45 # include "wimlib/ntfs_3g.h"
58 * Compressed resources
60 * A compressed resource in a WIM consists of a number of consecutive LZX or
61 * XPRESS-compressed chunks, each of which decompresses to 32768 bytes of data,
62 * except possibly the last, which always decompresses to any remaining bytes.
63 * In addition, immediately before the chunks, a table (the "chunk table")
64 * provides the offset, in bytes relative to the end of the chunk table, of the
65 * start of each compressed chunk, except for the first chunk which is omitted
66 * as it always has an offset of 0. Therefore, a compressed resource with N
67 * chunks will have a chunk table with N - 1 entries.
69 * Additional information:
71 * - Entries in the chunk table are 4 bytes each, except if the uncompressed
72 * size of the resource is greater than 4 GiB, in which case the entries in
73 * the chunk table are 8 bytes each. In either case, the entries are unsigned
74 * little-endian integers.
76 * - The chunk table is included in the compressed size of the resource provided
77 * in the corresponding entry in the WIM's stream lookup table.
79 * - The compressed size of a chunk is never greater than the uncompressed size.
80 * From the compressor's point of view, chunks that would have compressed to a
81 * size greater than or equal to their original size are in fact stored
82 * uncompressed. From the decompresser's point of view, chunks with
83 * compressed size equal to their uncompressed size are in fact uncompressed.
85 * Furthermore, wimlib supports its own "pipable" WIM format, and for this the
86 * structure of compressed resources was modified to allow piped reading and
87 * writing. To make sequential writing possible, the chunk table is placed
88 * after the chunks rather than before the chunks, and to make sequential
89 * reading possible, each chunk is prefixed with a 4-byte header giving its
90 * compressed size as a 32-bit, unsigned, little-endian integer (less than or
91 * equal to 32768). Otherwise the details are the same.
94 static int decompress(const void *cchunk, unsigned clen,
95 void *uchunk, unsigned ulen,
96 int ctype, u32 wim_chunk_size)
99 case WIMLIB_COMPRESSION_TYPE_XPRESS:
100 return wimlib_xpress_decompress(cchunk,
104 case WIMLIB_COMPRESSION_TYPE_LZX:
105 return wimlib_lzx_decompress2(cchunk,
117 * read_compressed_resource()-
119 * Read data from a compressed resource being read from a seekable WIM file.
120 * The resource may be either pipable or non-pipable.
125 * Just do a normal read, decompressing the data if necessary.
127 * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
128 * Read the raw contents of the compressed chunks of the compressed
129 * resource. For pipable resources, this does *not* include the chunk
130 * headers. If a callback function is being used, it will be called once
131 * for each compressed chunk. For non-pipable resources, this mode
132 * excludes the chunk table. For pipable resources, this mode excludes the
133 * stream and chunk headers.
136 read_compressed_resource(const struct wim_lookup_table_entry * const lte,
137 u64 size, const consume_data_callback_t cb,
138 const u32 in_chunk_size, void * const ctx_or_buf,
139 const int flags, const u64 offset)
143 const u32 orig_chunk_size = wim_resource_chunk_size(lte);
144 const u32 orig_chunk_order = bsr32(orig_chunk_size);
146 wimlib_assert(is_power_of_2(orig_chunk_size));
147 wimlib_assert(cb == NULL || is_power_of_2(in_chunk_size));
149 /* Currently, reading raw compressed chunks is only guaranteed to work
150 * correctly when the full resource is requested. Furthermore, in such
151 * cases the requested size is specified as the compressed size, but
152 * here we change it to an uncompressed size to avoid confusing the rest
153 * of this function. */
154 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
155 wimlib_assert(offset == 0);
156 wimlib_assert(size == lte->resource_entry.size);
157 wimlib_assert(wim_resource_chunk_size(lte) == in_chunk_size);
158 size = wim_resource_size(lte);
161 wimlib_assert(offset + size <= wim_resource_size(lte));
163 /* Handle the trivial case. */
167 u64 *chunk_offsets = NULL;
170 void *compressed_buf = NULL;
171 bool chunk_offsets_malloced = false;
172 bool out_buf_malloced = false;
173 bool tmp_buf_malloced = false;
174 bool compressed_buf_malloced = false;
175 const size_t stack_max = 32768;
177 /* Get the file descriptor for the WIM. */
178 struct filedes * const in_fd = <e->wim->in_fd;
180 /* Calculate the number of chunks the resource is divided into. */
181 const u64 num_chunks = wim_resource_chunks(lte);
183 /* Calculate the number of entries in the chunk table; it's one less
184 * than the number of chunks, since the first chunk has no entry. */
185 const u64 num_chunk_entries = num_chunks - 1;
187 /* Calculate the 0-based index of the chunk at which the read starts.
189 const u64 start_chunk = offset >> orig_chunk_order;
191 /* Calculate the offset, within the start chunk, of the first byte of
193 const u32 start_offset_in_chunk = offset & (orig_chunk_size - 1);
195 /* Calculate the index of the chunk that contains the last byte of the
197 const u64 end_chunk = (offset + size - 1) >> orig_chunk_order;
199 /* Calculate the offset, within the end chunk, of the last byte of the
201 const u32 end_offset_in_chunk = (offset + size - 1) & (orig_chunk_size - 1);
203 /* Calculate the number of chunk entries are actually needed to read the
204 * requested part of the resource. Include an entry for the first chunk
205 * even though that doesn't exist in the on-disk table, but take into
206 * account that if the last chunk required for the read is not the last
207 * chunk of the resource, an extra chunk entry is needed so that the
208 * compressed size of the last chunk of the read can be determined. */
209 const u64 num_alloc_chunk_entries = end_chunk - start_chunk +
210 1 + (end_chunk != num_chunks - 1);
212 /* Set the size of each chunk table entry based on the resource's
213 * uncompressed size. */
214 const u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
216 /* Calculate the size, in bytes, of the full chunk table. */
217 const u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
219 /* Allocate a buffer to hold a subset of the chunk table. It will only
220 * contain offsets for the chunks that are actually needed for this
221 * read. For speed, allocate the buffer on the stack unless it's too
223 if (num_alloc_chunk_entries <= stack_max) {
224 chunk_offsets = alloca(num_alloc_chunk_entries * sizeof(u64));
225 chunk_offsets_malloced = false;
227 chunk_offsets = MALLOC(num_alloc_chunk_entries * sizeof(u64));
228 if (!chunk_offsets) {
229 ERROR("Failed to allocate chunk table "
230 "with %"PRIu64" entries", num_alloc_chunk_entries);
231 return WIMLIB_ERR_NOMEM;
233 chunk_offsets_malloced = true;
236 /* Set the implicit offset of the first chunk if it's included in the
238 if (start_chunk == 0)
239 chunk_offsets[0] = 0;
241 /* Calculate the index of the first needed entry in the chunk table. */
242 const u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
244 /* Calculate the number of entries that need to be read from the chunk
246 const u64 num_needed_chunk_entries = (start_chunk == 0) ?
247 num_alloc_chunk_entries - 1 : num_alloc_chunk_entries;
249 /* Calculate the number of bytes of data that need to be read from the
251 const size_t chunk_table_needed_size =
252 num_needed_chunk_entries * chunk_entry_size;
253 if ((u64)chunk_table_needed_size !=
254 num_needed_chunk_entries * chunk_entry_size)
256 ERROR("Compressed read request too large to fit into memory!");
257 ret = WIMLIB_ERR_NOMEM;
258 goto out_free_memory;
261 /* Calculate the byte offset, in the WIM file, of the first chunk table
262 * entry to read. Take into account that if the WIM file is in the
263 * special "pipable" format, then the chunk table is at the end of the
264 * resource, not the beginning. */
265 const u64 file_offset_of_needed_chunk_entries =
266 lte->resource_entry.offset
267 + (start_table_idx * chunk_entry_size)
268 + (lte->is_pipable ? (lte->resource_entry.size - chunk_table_size) : 0);
270 /* Read the needed chunk table entries into the end of the chunk_offsets
272 void * const chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
273 chunk_table_needed_size;
274 ret = full_pread(in_fd, chunk_tab_data, chunk_table_needed_size,
275 file_offset_of_needed_chunk_entries);
279 /* Now fill in chunk_offsets from the entries we have read in
280 * chunk_tab_data. Careful: chunk_offsets aliases chunk_tab_data, which
281 * breaks C's aliasing rules when we read 32-bit integers and store
282 * 64-bit integers. But since the operations are safe as long as the
283 * compiler doesn't mess with their order, we use the gcc may_alias
284 * extension to tell the compiler that loads from the 32-bit integers
285 * may alias stores to the 64-bit integers. */
287 typedef le64 __attribute__((may_alias)) aliased_le64_t;
288 typedef le32 __attribute__((may_alias)) aliased_le32_t;
289 u64 * const chunk_offsets_p = chunk_offsets + (start_chunk == 0);
292 if (chunk_entry_size == 4) {
293 aliased_le32_t *raw_entries = (aliased_le32_t*)chunk_tab_data;
294 for (i = 0; i < num_needed_chunk_entries; i++)
295 chunk_offsets_p[i] = le32_to_cpu(raw_entries[i]);
297 aliased_le64_t *raw_entries = (aliased_le64_t*)chunk_tab_data;
298 for (i = 0; i < num_needed_chunk_entries; i++)
299 chunk_offsets_p[i] = le64_to_cpu(raw_entries[i]);
303 /* Calculate file offset of the first chunk that needs to be read.
304 * Note: if the resource is pipable, the entries in the chunk table do
305 * *not* include the chunk headers. */
306 u64 cur_read_offset = lte->resource_entry.offset + chunk_offsets[0];
307 if (!lte->is_pipable)
308 cur_read_offset += chunk_table_size;
310 cur_read_offset += start_chunk * sizeof(struct pwm_chunk_hdr);
312 /* If using a callback function, allocate a temporary buffer that will
313 * be used to pass data to it. If writing directly to a buffer instead,
314 * arrange to write data directly into it. */
316 u8 *out_buf_end, *out_p;
318 out_buf_size = max(in_chunk_size, orig_chunk_size);
319 if (out_buf_size <= stack_max) {
320 out_buf = alloca(out_buf_size);
322 out_buf = MALLOC(out_buf_size);
323 if (out_buf == NULL) {
324 ret = WIMLIB_ERR_NOMEM;
325 goto out_free_memory;
327 out_buf_malloced = true;
331 out_buf = ctx_or_buf;
333 out_buf_end = out_buf + out_buf_size;
336 /* Unless the raw compressed data was requested, allocate a temporary
337 * buffer for reading compressed chunks, each of which can be at most
338 * orig_chunk_size - 1 bytes. This excludes compressed chunks that are
339 * a full orig_chunk_size bytes, which are actually stored uncompressed.
341 if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS)) {
342 if (orig_chunk_size - 1 <= stack_max) {
343 compressed_buf = alloca(orig_chunk_size - 1);
345 compressed_buf = MALLOC(orig_chunk_size - 1);
346 if (compressed_buf == NULL) {
347 ret = WIMLIB_ERR_NOMEM;
348 goto out_free_memory;
350 compressed_buf_malloced = true;
354 /* Allocate yet another temporary buffer, this one for reading partial
356 if (start_offset_in_chunk != 0 ||
357 (end_offset_in_chunk != orig_chunk_size - 1 &&
358 offset + size != wim_resource_size(lte)))
360 if (orig_chunk_size <= stack_max) {
361 tmp_buf = alloca(orig_chunk_size);
363 tmp_buf = MALLOC(orig_chunk_size);
364 if (tmp_buf == NULL) {
365 ret = WIMLIB_ERR_NOMEM;
366 goto out_free_memory;
368 tmp_buf_malloced = true;
372 /* Read, and possibly decompress, each needed chunk, either writing the
373 * data directly into the @ctx_or_buf buffer or passing it to the @cb
374 * callback function. */
375 for (u64 i = start_chunk; i <= end_chunk; i++) {
377 /* If the resource is pipable, skip the chunk header. */
379 cur_read_offset += sizeof(struct pwm_chunk_hdr);
381 /* Calculate the sizes of the compressed chunk and of the
382 * uncompressed chunk. */
383 u32 compressed_chunk_size;
384 u32 uncompressed_chunk_size;
385 if (i != num_chunks - 1) {
386 /* Not the last chunk. Compressed size is given by
387 * difference of chunk table entries; uncompressed size
388 * is always the WIM chunk size. */
389 compressed_chunk_size = chunk_offsets[i + 1 - start_chunk] -
390 chunk_offsets[i - start_chunk];
391 uncompressed_chunk_size = orig_chunk_size;
393 /* Last chunk. Compressed size is the remaining size in
394 * the compressed resource; uncompressed size is the
395 * remaining size in the uncompressed resource. */
396 compressed_chunk_size = lte->resource_entry.size -
398 chunk_offsets[i - start_chunk];
400 compressed_chunk_size -= num_chunks *
401 sizeof(struct pwm_chunk_hdr);
403 if ((wim_resource_size(lte) & (orig_chunk_size - 1)) == 0)
404 uncompressed_chunk_size = orig_chunk_size;
406 uncompressed_chunk_size = wim_resource_size(lte) &
407 (orig_chunk_size - 1);
410 /* Calculate how much of this chunk needs to be read. */
412 u32 partial_chunk_size;
413 u32 start_offset = 0;
414 u32 end_offset = orig_chunk_size - 1;
416 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
417 partial_chunk_size = compressed_chunk_size;
419 if (i == start_chunk)
420 start_offset = start_offset_in_chunk;
423 end_offset = end_offset_in_chunk;
425 partial_chunk_size = end_offset + 1 - start_offset;
428 if (compressed_chunk_size == uncompressed_chunk_size ||
429 (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
431 /* Chunk stored uncompressed, or reading raw chunk data. */
432 ret = full_pread(in_fd,
435 cur_read_offset + start_offset);
439 /* Compressed chunk and not doing raw read. */
442 /* Read the compressed data into compressed_buf. */
443 ret = full_pread(in_fd,
445 compressed_chunk_size,
450 /* For partial chunks we must buffer the uncompressed
451 * data because we don't need all of it. */
452 if (partial_chunk_size == uncompressed_chunk_size)
457 /* Decompress the chunk. */
458 ret = decompress(compressed_buf,
459 compressed_chunk_size,
461 uncompressed_chunk_size,
462 wim_resource_compression_type(lte),
465 ERROR("Failed to decompress data.");
466 ret = WIMLIB_ERR_DECOMPRESSION;
468 goto out_free_memory;
470 if (partial_chunk_size != uncompressed_chunk_size)
471 memcpy(out_p, tmp_buf + start_offset,
475 out_p += partial_chunk_size;
478 /* Feed the data to the callback function. */
479 wimlib_assert(offset == 0);
481 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
482 ret = cb(out_buf, out_p - out_buf, ctx_or_buf);
484 goto out_free_memory;
487 } else if (i == end_chunk || out_p == out_buf_end) {
491 for (p = out_buf; p != out_p; p += bytes_sent) {
492 bytes_sent = min(in_chunk_size, out_p - p);
493 ret = cb(p, bytes_sent, ctx_or_buf);
495 goto out_free_memory;
500 cur_read_offset += compressed_chunk_size;
505 if (chunk_offsets_malloced)
507 if (out_buf_malloced)
509 if (compressed_buf_malloced)
510 FREE(compressed_buf);
511 if (tmp_buf_malloced)
516 ERROR_WITH_ERRNO("Error reading compressed file resource");
517 goto out_free_memory;
520 /* Skip over the chunk table at the end of pipable, compressed resource being
521 * read from a pipe. */
523 skip_chunk_table(const struct wim_lookup_table_entry *lte,
524 struct filedes *in_fd)
526 u64 num_chunk_entries = wim_resource_chunks(lte) - 1;
527 u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
528 u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
531 if (num_chunk_entries != 0) {
533 ret = full_pread(in_fd, &dummy, 1,
534 in_fd->offset + chunk_table_size - 1);
541 /* Read and decompress data from a compressed, pipable resource being read from
544 read_pipable_resource(const struct wim_lookup_table_entry *lte,
545 u64 size, consume_data_callback_t cb,
546 u32 in_chunk_size, void *ctx_or_buf,
547 int flags, u64 offset)
549 struct filedes *in_fd;
551 const u32 orig_chunk_size = wim_resource_chunk_size(lte);
552 u8 cchunk[orig_chunk_size - 1];
555 u8 *out_buf, *out_buf_end, *out_p;
557 out_buf_size = max(in_chunk_size, orig_chunk_size);
558 out_buf = alloca(out_buf_size);
561 out_buf = ctx_or_buf;
563 out_buf_end = out_buf + out_buf_size;
566 /* Get pointers to appropriate decompression function and the input file
568 in_fd = <e->wim->in_fd;
570 /* This function currently assumes the entire resource is being read at
571 * once and that the raw compressed data isn't being requested. This is
572 * based on the fact that this function currently only gets called
573 * during the operation of wimlib_extract_image_from_pipe(). */
574 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW));
575 wimlib_assert(offset == 0);
576 wimlib_assert(size == wim_resource_size(lte));
577 wimlib_assert(in_fd->offset == lte->resource_entry.offset);
580 for (offset = 0; offset < size; offset += chunk_usize) {
581 struct pwm_chunk_hdr chunk_hdr;
584 /* Calculate uncompressed size of next chunk. */
585 chunk_usize = min(orig_chunk_size, size - offset);
587 /* Read the compressed size of the next chunk from the chunk
589 ret = full_read(in_fd, &chunk_hdr, sizeof(chunk_hdr));
593 chunk_csize = le32_to_cpu(chunk_hdr.compressed_size);
595 if (chunk_csize > orig_chunk_size) {
597 ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
601 /* Read chunk data. */
602 ret = full_read(in_fd, cchunk, chunk_csize);
606 if (flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY)
609 /* Decompress chunk if needed. Uncompressed size same
610 * as compressed size means the chunk is uncompressed.
612 if (chunk_csize == chunk_usize) {
613 memcpy(out_p, cchunk, chunk_usize);
615 ret = (*decompress)(cchunk, chunk_csize,
617 wim_resource_compression_type(lte),
621 ret = WIMLIB_ERR_DECOMPRESSION;
625 out_p += chunk_usize;
627 /* Feed the uncompressed data into the callback function or copy
628 * it into the provided buffer. */
629 if (cb && (out_p == out_buf_end ||
630 offset + chunk_usize == size))
635 for (p = out_buf; p != out_p; p += bytes_sent) {
636 bytes_sent = min(in_chunk_size, out_p - p);
637 ret = cb(p, bytes_sent, ctx_or_buf);
645 ret = skip_chunk_table(lte, in_fd);
651 ERROR_WITH_ERRNO("Error reading compressed file resource");
655 ERROR("Compressed file resource is invalid");
660 * read_partial_wim_resource()-
662 * Read a range of data from a uncompressed or compressed resource in a WIM
663 * file. Data is written into a buffer or fed into a callback function, as
664 * documented in read_resource_prefix().
669 * Just do a normal read, decompressing the data if necessary. @size and
670 * @offset are interpreted relative to the uncompressed contents of the
673 * WIMLIB_READ_RESOURCE_FLAG_RAW_FULL:
674 * Only valid when the resource is compressed: Read the raw contents of
675 * the compressed resource. If the resource is non-pipable, this includes
676 * the chunk table as well as the compressed chunks. If the resource is
677 * pipable, this includes the compressed chunks--- including the chunk
678 * headers--- and the chunk table. The stream header is still *not*
681 * In this mode, @offset is relative to the beginning of the raw contents
682 * of the compressed resource--- that is, the chunk table if the resource
683 * is non-pipable, or the header for the first compressed chunk if the
684 * resource is pipable. @size is the number of raw bytes to read, which
685 * must not overrun the end of the resource. For example, if @offset is 0,
686 * then @size can be at most the raw size of the compressed resource
687 * (@lte->resource_entry.size).
689 * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
690 * Only valid when the resource is compressed and is not being read from a
691 * pipe: Read the raw contents of the compressed chunks of the compressed
692 * resource. For pipable resources, this does *not* include the chunk
693 * headers. If a callback function is being used, it will be called once
694 * for each compressed chunk. The chunk table is excluded. Also, for
695 * pipable resources, the stream and chunk headers are excluded. In this
696 * mode, @size must be exactly the raw size of the compressed resource
697 * (@lte->resource_entry.size) and @offset must be 0.
699 * WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY:
700 * Only valid when the resource is being read from a pipe: Skip over the
701 * requested data rather than feed it to the callback function or write it
702 * into the buffer. No decompression is done.
703 * WIMLIB_READ_RESOURCE_FLAG_RAW_* may not be combined with this flag.
704 * @offset must be 0 and @size must be the uncompressed size of the
708 * WIMLIB_ERR_SUCCESS (0)
709 * WIMLIB_ERR_READ (errno set)
710 * WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0)
711 * WIMLIB_ERR_NOMEM (errno set to ENOMEM)
712 * WIMLIB_ERR_DECOMPRESSION (errno set to EINVAL)
713 * WIMLIB_ERR_INVALID_PIPABLE_WIM (errno set to EINVAL)
715 * or other error code returned by the @cb function.
718 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
719 u64 size, consume_data_callback_t cb,
721 void *ctx_or_buf, int flags, u64 offset)
723 struct filedes *in_fd;
726 /* Make sure the resource is actually located in a WIM file and is not
728 wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
730 /* If a callback was specified, in_chunk_size must be a power of 2 (and
732 wimlib_assert(cb == NULL || is_power_of_2(in_chunk_size));
734 /* If a callback was specified, offset must be zero. */
735 wimlib_assert(cb == NULL || offset == 0);
737 /* Retrieve input file descriptor for the WIM file. */
738 in_fd = <e->wim->in_fd;
740 /* Don't allow raw reads (either full or chunks) of uncompressed
742 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW) ||
743 resource_is_compressed(<e->resource_entry));
745 /* Don't allow seek-only reads unless reading from a pipe; also don't
746 * allow combining SEEK_ONLY with either RAW flag. */
747 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY) ||
748 (!filedes_is_seekable(in_fd) &&
749 !(flags & WIMLIB_READ_RESOURCE_FLAG_RAW)));
751 DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64" "
752 "from %"PRIu64" @ +%"PRIu64" (readflags 0x%08x, resflags 0x%02x%s)",
754 lte->resource_entry.original_size, lte->resource_entry.offset,
755 flags, lte->resource_entry.flags,
756 (lte->is_pipable ? ", pipable" : ""));
758 if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) ||
759 !resource_is_compressed(<e->resource_entry))
761 /* Reading raw resource contents or reading uncompressed
763 wimlib_assert(offset + size <= lte->resource_entry.size);
764 offset += lte->resource_entry.offset;
765 if (flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY) {
766 if (lte->resource_entry.size != 0) {
768 ret = full_pread(in_fd, &dummy, 1,
769 offset + lte->resource_entry.size - 1);
774 /* Send data to callback function */
775 u8 buf[min(in_chunk_size, size)];
777 size_t bytes_to_read = min(in_chunk_size, size);
778 ret = full_pread(in_fd, buf, bytes_to_read,
782 ret = cb(buf, bytes_to_read, ctx_or_buf);
785 size -= bytes_to_read;
786 offset += bytes_to_read;
789 /* Send data directly to a buffer */
790 ret = full_pread(in_fd, ctx_or_buf, size, offset);
795 } else if (lte->is_pipable && !filedes_is_seekable(in_fd)) {
796 /* Reading compressed, pipable resource from pipe. */
797 ret = read_pipable_resource(lte, size, cb,
799 ctx_or_buf, flags, offset);
801 /* Reading compressed, possibly pipable resource from seekable
803 ret = read_compressed_resource(lte, size, cb,
805 ctx_or_buf, flags, offset);
810 ERROR_WITH_ERRNO("Error reading data from WIM");
817 read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
818 size_t size, u64 offset, void *buf)
820 return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset);
824 read_wim_resource_prefix(const struct wim_lookup_table_entry *lte,
826 consume_data_callback_t cb,
831 return read_partial_wim_resource(lte, size, cb, in_chunk_size,
832 ctx_or_buf, flags, 0);
838 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
840 consume_data_callback_t cb,
845 const tchar *filename = lte->file_on_disk;
850 bool out_buf_malloced;
851 const size_t stack_max = 32768;
853 DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"",
854 size, lte->file_on_disk);
856 raw_fd = open(filename, O_RDONLY);
858 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", filename);
859 return WIMLIB_ERR_OPEN;
861 filedes_init(&fd, raw_fd);
862 out_buf_malloced = false;
864 /* Send data to callback function */
865 if (in_chunk_size <= stack_max) {
866 out_buf = alloca(in_chunk_size);
868 out_buf = MALLOC(in_chunk_size);
869 if (out_buf == NULL) {
870 ret = WIMLIB_ERR_NOMEM;
873 out_buf_malloced = true;
876 size_t bytes_to_read;
878 bytes_to_read = min(in_chunk_size, size);
879 ret = full_read(&fd, out_buf, bytes_to_read);
882 ret = cb(out_buf, bytes_to_read, ctx_or_buf);
885 size -= bytes_to_read;
888 /* Send data directly to a buffer */
889 ret = full_read(&fd, ctx_or_buf, size);
897 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", filename);
900 if (out_buf_malloced)
904 #endif /* !__WIN32__ */
907 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
908 u64 size, consume_data_callback_t cb,
910 void *ctx_or_buf, int _ignored_flags)
917 for (u64 offset = 0; offset < size; offset += chunk_size) {
918 chunk_size = min(in_chunk_size, size - offset);
919 ret = cb((const u8*)lte->attached_buffer + offset,
920 chunk_size, ctx_or_buf);
925 memcpy(ctx_or_buf, lte->attached_buffer, size);
930 typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
932 consume_data_callback_t cb,
938 * read_resource_prefix()-
940 * Read the first @size bytes from a generic "resource", which may be located in
941 * the WIM (compressed or uncompressed), in an external file, or directly in an
944 * Feed the data either to a callback function (cb != NULL, passing it
945 * ctx_or_buf), or write it directly into a buffer (cb == NULL, ctx_or_buf
946 * specifies the buffer, which must have room for @size bytes).
948 * When using a callback function, it is called with chunks up to 32768 bytes in
949 * size until the resource is exhausted.
951 * If the resource is located in a WIM file, @flags can be set as documented in
952 * read_partial_wim_resource(). Otherwise @flags are ignored.
955 read_resource_prefix(const struct wim_lookup_table_entry *lte,
956 u64 size, consume_data_callback_t cb, u32 in_chunk_size,
957 void *ctx_or_buf, int flags)
959 static const read_resource_prefix_handler_t handlers[] = {
960 [RESOURCE_IN_WIM] = read_wim_resource_prefix,
962 [RESOURCE_IN_FILE_ON_DISK] = read_win32_file_prefix,
964 [RESOURCE_IN_FILE_ON_DISK] = read_file_on_disk_prefix,
966 [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
968 [RESOURCE_IN_STAGING_FILE] = read_file_on_disk_prefix,
971 [RESOURCE_IN_NTFS_VOLUME] = read_ntfs_file_prefix,
974 [RESOURCE_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix,
977 wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
978 && handlers[lte->resource_location] != NULL);
979 wimlib_assert(cb == NULL || in_chunk_size > 0);
980 return handlers[lte->resource_location](lte, size, cb, in_chunk_size, ctx_or_buf, flags);
984 read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
987 return read_resource_prefix(lte, wim_resource_size(lte), NULL, 0, buf, 0);
991 read_full_resource_into_alloc_buf(const struct wim_lookup_table_entry *lte,
997 if ((size_t)lte->resource_entry.original_size !=
998 lte->resource_entry.original_size)
1000 ERROR("Can't read %"PRIu64" byte resource into "
1001 "memory", lte->resource_entry.original_size);
1002 return WIMLIB_ERR_NOMEM;
1005 buf = MALLOC(lte->resource_entry.original_size);
1007 return WIMLIB_ERR_NOMEM;
1009 ret = read_full_resource_into_buf(lte, buf);
1020 res_entry_to_data(const struct resource_entry *res_entry,
1021 WIMStruct *wim, void **buf_ret)
1024 struct wim_lookup_table_entry *lte;
1026 lte = new_lookup_table_entry();
1028 return WIMLIB_ERR_NOMEM;
1030 copy_resource_entry(<e->resource_entry, res_entry);
1032 lte->part_number = wim->hdr.part_number;
1033 lte_init_wim(lte, wim);
1035 ret = read_full_resource_into_alloc_buf(lte, buf_ret);
1036 free_lookup_table_entry(lte);
1040 struct extract_ctx {
1042 consume_data_callback_t extract_chunk;
1043 void *extract_chunk_arg;
1047 extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size,
1050 struct extract_ctx *ctx = _ctx;
1052 sha1_update(&ctx->sha_ctx, chunk, chunk_size);
1053 return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg);
1056 /* Extracts the first @size bytes of a WIM resource to somewhere. In the
1057 * process, the SHA1 message digest of the resource is checked if the full
1058 * resource is being extracted.
1060 * @extract_chunk is a function that is called to extract each chunk of the
1063 extract_wim_resource(const struct wim_lookup_table_entry *lte,
1065 consume_data_callback_t extract_chunk,
1066 void *extract_chunk_arg)
1069 if (size == wim_resource_size(lte)) {
1071 struct extract_ctx ctx;
1072 ctx.extract_chunk = extract_chunk;
1073 ctx.extract_chunk_arg = extract_chunk_arg;
1074 sha1_init(&ctx.sha_ctx);
1075 ret = read_resource_prefix(lte, size,
1076 extract_chunk_sha1_wrapper,
1077 wim_resource_chunk_size(lte),
1080 u8 hash[SHA1_HASH_SIZE];
1081 sha1_final(hash, &ctx.sha_ctx);
1082 if (!hashes_equal(hash, lte->hash)) {
1083 if (wimlib_print_errors) {
1084 ERROR("Invalid SHA1 message digest "
1085 "on the following WIM resource:");
1086 print_lookup_table_entry(lte, stderr);
1087 if (lte->resource_location == RESOURCE_IN_WIM)
1088 ERROR("The WIM file appears to be corrupt!");
1090 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
1095 ret = read_resource_prefix(lte, size, extract_chunk,
1096 wim_resource_chunk_size(lte),
1097 extract_chunk_arg, 0);
1103 extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p)
1105 struct filedes *fd = _fd_p;
1106 int ret = full_write(fd, buf, len);
1108 ERROR_WITH_ERRNO("Error writing to file descriptor");
1113 extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte,
1114 struct filedes *fd, u64 size)
1116 return extract_wim_resource(lte, size, extract_wim_chunk_to_fd, fd);
1121 sha1_chunk(const void *buf, size_t len, void *ctx)
1123 sha1_update(ctx, buf, len);
1127 /* Calculate the SHA1 message digest of a stream. */
1129 sha1_resource(struct wim_lookup_table_entry *lte)
1134 sha1_init(&sha_ctx);
1135 ret = read_resource_prefix(lte, wim_resource_size(lte),
1136 sha1_chunk, wim_resource_chunk_size(lte),
1139 sha1_final(lte->hash, &sha_ctx);
1143 /* Translates a WIM resource entry from the on-disk format to an in-memory
1146 get_resource_entry(const struct resource_entry_disk *disk_entry,
1147 struct resource_entry *entry)
1149 /* Note: disk_entry may not be 8 byte aligned--- in that case, the
1150 * offset and original_size members will be unaligned. (This should be
1151 * okay since `struct resource_entry_disk' is declared as packed.) */
1153 /* Read the size and flags into a bitfield portably... */
1154 entry->size = (((u64)disk_entry->size[0] << 0) |
1155 ((u64)disk_entry->size[1] << 8) |
1156 ((u64)disk_entry->size[2] << 16) |
1157 ((u64)disk_entry->size[3] << 24) |
1158 ((u64)disk_entry->size[4] << 32) |
1159 ((u64)disk_entry->size[5] << 40) |
1160 ((u64)disk_entry->size[6] << 48));
1161 entry->flags = disk_entry->flags;
1162 entry->offset = le64_to_cpu(disk_entry->offset);
1163 entry->original_size = le64_to_cpu(disk_entry->original_size);
1165 /* offset and original_size are truncated to 62 bits to avoid possible
1166 * overflows, when converting to a signed 64-bit integer (off_t) or when
1167 * adding size or original_size. This is okay since no one would ever
1168 * actually have a WIM bigger than 4611686018427387903 bytes... */
1169 if (entry->offset & 0xc000000000000000ULL) {
1170 WARNING("Truncating offset in resource entry");
1171 entry->offset &= 0x3fffffffffffffffULL;
1173 if (entry->original_size & 0xc000000000000000ULL) {
1174 WARNING("Truncating original_size in resource entry");
1175 entry->original_size &= 0x3fffffffffffffffULL;
1179 /* Translates a WIM resource entry from an in-memory format into the on-disk
1182 put_resource_entry(const struct resource_entry *entry,
1183 struct resource_entry_disk *disk_entry)
1185 /* Note: disk_entry may not be 8 byte aligned--- in that case, the
1186 * offset and original_size members will be unaligned. (This should be
1187 * okay since `struct resource_entry_disk' is declared as packed.) */
1188 u64 size = entry->size;
1190 disk_entry->size[0] = size >> 0;
1191 disk_entry->size[1] = size >> 8;
1192 disk_entry->size[2] = size >> 16;
1193 disk_entry->size[3] = size >> 24;
1194 disk_entry->size[4] = size >> 32;
1195 disk_entry->size[5] = size >> 40;
1196 disk_entry->size[6] = size >> 48;
1197 disk_entry->flags = entry->flags;
1198 disk_entry->offset = cpu_to_le64(entry->offset);
1199 disk_entry->original_size = cpu_to_le64(entry->original_size);