4 * Read uncompressed and compressed metadata and file resources from a WIM file.
8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free Software
14 * Foundation; either version 3 of the License, or (at your option) any later
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License along with
22 * wimlib; if not, see http://www.gnu.org/licenses/.
30 #include "wimlib/dentry.h"
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/lookup_table.h"
35 #include "wimlib/resource.h"
36 #include "wimlib/sha1.h"
39 /* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */
40 # include "wimlib/win32.h"
44 /* for read_ntfs_file_prefix() */
45 # include "wimlib/ntfs_3g.h"
58 * Compressed resources
60 * A compressed resource in a WIM consists of a number of consecutive LZX or
61 * XPRESS-compressed chunks, each of which decompresses to 32768 bytes of data,
62 * except possibly the last, which always decompresses to any remaining bytes.
63 * In addition, immediately before the chunks, a table (the "chunk table")
64 * provides the offset, in bytes relative to the end of the chunk table, of the
65 * start of each compressed chunk, except for the first chunk which is omitted
66 * as it always has an offset of 0. Therefore, a compressed resource with N
67 * chunks will have a chunk table with N - 1 entries.
69 * Additional information:
71 * - Entries in the chunk table are 4 bytes each, except if the uncompressed
72 * size of the resource is greater than 4 GiB, in which case the entries in
73 * the chunk table are 8 bytes each. In either case, the entries are unsigned
74 * little-endian integers.
76 * - The chunk table is included in the compressed size of the resource provided
77 * in the corresponding entry in the WIM's stream lookup table.
79 * - The compressed size of a chunk is never greater than the uncompressed size.
80 * From the compressor's point of view, chunks that would have compressed to a
81 * size greater than or equal to their original size are in fact stored
82 * uncompressed. From the decompresser's point of view, chunks with
83 * compressed size equal to their uncompressed size are in fact uncompressed.
85 * Furthermore, wimlib supports its own "pipable" WIM format, and for this the
86 * structure of compressed resources was modified to allow piped reading and
87 * writing. To make sequential writing possible, the chunk table is placed
88 * after the chunks rather than before the chunks, and to make sequential
89 * reading possible, each chunk is prefixed with a 4-byte header giving its
90 * compressed size as a 32-bit, unsigned, little-endian integer (less than or
91 * equal to 32768). Otherwise the details are the same.
94 typedef int (*decompress_func_t)(const void *, unsigned, void *, unsigned);
96 static decompress_func_t
97 get_decompress_func(int ctype)
100 case WIMLIB_COMPRESSION_TYPE_LZX:
101 return wimlib_lzx_decompress;
102 case WIMLIB_COMPRESSION_TYPE_XPRESS:
103 return wimlib_xpress_decompress;
111 * read_compressed_resource()-
113 * Read data from a compressed resource being read from a seekable WIM file.
114 * The resource may be either pipable or non-pipable.
119 * Just do a normal read, decompressing the data if necessary.
121 * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
122 * Read the raw contents of the compressed chunks of the compressed
123 * resource. For pipable resources, this does *not* include the chunk
124 * headers. If a callback function is being used, it will be called once
125 * for each compressed chunk. For non-pipable resources, this mode
126 * excludes the chunk table. For pipable resources, this mode excludes the
127 * stream and chunk headers.
130 read_compressed_resource(const struct wim_lookup_table_entry * const lte,
131 u64 size, const consume_data_callback_t cb,
132 const u32 in_chunk_size, void * const ctx_or_buf,
133 const int flags, const u64 offset)
137 const u32 orig_chunk_size = wim_resource_chunk_size(lte);
138 const u32 orig_chunk_order = bsr32(orig_chunk_size);
140 wimlib_assert(is_power_of_2(orig_chunk_size));
141 wimlib_assert(cb == NULL || is_power_of_2(in_chunk_size));
143 /* Currently, reading raw compressed chunks is only guaranteed to work
144 * correctly when the full resource is requested. Furthermore, in such
145 * cases the requested size is specified as the compressed size, but
146 * here we change it to an uncompressed size to avoid confusing the rest
147 * of this function. */
148 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
149 wimlib_assert(offset == 0);
150 wimlib_assert(size == lte->resource_entry.size);
151 wimlib_assert(wim_resource_chunk_size(lte) == in_chunk_size);
152 size = wim_resource_size(lte);
155 wimlib_assert(offset + size <= wim_resource_size(lte));
157 /* Handle the trivial case. */
161 u64 *chunk_offsets = NULL;
164 void *compressed_buf = NULL;
165 bool chunk_offsets_malloced = false;
166 bool out_buf_malloced = false;
167 bool tmp_buf_malloced = false;
168 bool compressed_buf_malloced = false;
169 const size_t stack_max = 32768;
171 /* Get the appropriate decompression function. */
172 const decompress_func_t decompress =
173 get_decompress_func(wim_resource_compression_type(lte));
175 /* Get the file descriptor for the WIM. */
176 struct filedes * const in_fd = <e->wim->in_fd;
178 /* Calculate the number of chunks the resource is divided into. */
179 const u64 num_chunks = wim_resource_chunks(lte);
181 /* Calculate the number of entries in the chunk table; it's one less
182 * than the number of chunks, since the first chunk has no entry. */
183 const u64 num_chunk_entries = num_chunks - 1;
185 /* Calculate the 0-based index of the chunk at which the read starts.
187 const u64 start_chunk = offset >> orig_chunk_order;
189 /* Calculate the offset, within the start chunk, of the first byte of
191 const u32 start_offset_in_chunk = offset & (orig_chunk_size - 1);
193 /* Calculate the index of the chunk that contains the last byte of the
195 const u64 end_chunk = (offset + size - 1) >> orig_chunk_order;
197 /* Calculate the offset, within the end chunk, of the last byte of the
199 const u32 end_offset_in_chunk = (offset + size - 1) & (orig_chunk_size - 1);
201 /* Calculate the number of chunk entries are actually needed to read the
202 * requested part of the resource. Include an entry for the first chunk
203 * even though that doesn't exist in the on-disk table, but take into
204 * account that if the last chunk required for the read is not the last
205 * chunk of the resource, an extra chunk entry is needed so that the
206 * compressed size of the last chunk of the read can be determined. */
207 const u64 num_alloc_chunk_entries = end_chunk - start_chunk +
208 1 + (end_chunk != num_chunks - 1);
210 /* Set the size of each chunk table entry based on the resource's
211 * uncompressed size. */
212 const u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
214 /* Calculate the size, in bytes, of the full chunk table. */
215 const u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
217 /* Allocate a buffer to hold a subset of the chunk table. It will only
218 * contain offsets for the chunks that are actually needed for this
219 * read. For speed, allocate the buffer on the stack unless it's too
221 if (num_alloc_chunk_entries <= stack_max) {
222 chunk_offsets = alloca(num_alloc_chunk_entries * sizeof(u64));
223 chunk_offsets_malloced = false;
225 chunk_offsets = MALLOC(num_alloc_chunk_entries * sizeof(u64));
226 if (!chunk_offsets) {
227 ERROR("Failed to allocate chunk table "
228 "with %"PRIu64" entries", num_alloc_chunk_entries);
229 return WIMLIB_ERR_NOMEM;
231 chunk_offsets_malloced = true;
234 /* Set the implicit offset of the first chunk if it's included in the
236 if (start_chunk == 0)
237 chunk_offsets[0] = 0;
239 /* Calculate the index of the first needed entry in the chunk table. */
240 const u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
242 /* Calculate the number of entries that need to be read from the chunk
244 const u64 num_needed_chunk_entries = (start_chunk == 0) ?
245 num_alloc_chunk_entries - 1 : num_alloc_chunk_entries;
247 /* Calculate the number of bytes of data that need to be read from the
249 const size_t chunk_table_needed_size =
250 num_needed_chunk_entries * chunk_entry_size;
251 if ((u64)chunk_table_needed_size !=
252 num_needed_chunk_entries * chunk_entry_size)
254 ERROR("Compressed read request too large to fit into memory!");
255 ret = WIMLIB_ERR_NOMEM;
256 goto out_free_memory;
259 /* Calculate the byte offset, in the WIM file, of the first chunk table
260 * entry to read. Take into account that if the WIM file is in the
261 * special "pipable" format, then the chunk table is at the end of the
262 * resource, not the beginning. */
263 const u64 file_offset_of_needed_chunk_entries =
264 lte->resource_entry.offset
265 + (start_table_idx * chunk_entry_size)
266 + (lte->is_pipable ? (lte->resource_entry.size - chunk_table_size) : 0);
268 /* Read the needed chunk table entries into the end of the chunk_offsets
270 void * const chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
271 chunk_table_needed_size;
272 ret = full_pread(in_fd, chunk_tab_data, chunk_table_needed_size,
273 file_offset_of_needed_chunk_entries);
277 /* Now fill in chunk_offsets from the entries we have read in
278 * chunk_tab_data. Careful: chunk_offsets aliases chunk_tab_data, which
279 * breaks C's aliasing rules when we read 32-bit integers and store
280 * 64-bit integers. But since the operations are safe as long as the
281 * compiler doesn't mess with their order, we use the gcc may_alias
282 * extension to tell the compiler that loads from the 32-bit integers
283 * may alias stores to the 64-bit integers. */
285 typedef le64 __attribute__((may_alias)) aliased_le64_t;
286 typedef le32 __attribute__((may_alias)) aliased_le32_t;
287 u64 * const chunk_offsets_p = chunk_offsets + (start_chunk == 0);
290 if (chunk_entry_size == 4) {
291 aliased_le32_t *raw_entries = (aliased_le32_t*)chunk_tab_data;
292 for (i = 0; i < num_needed_chunk_entries; i++)
293 chunk_offsets_p[i] = le32_to_cpu(raw_entries[i]);
295 aliased_le64_t *raw_entries = (aliased_le64_t*)chunk_tab_data;
296 for (i = 0; i < num_needed_chunk_entries; i++)
297 chunk_offsets_p[i] = le64_to_cpu(raw_entries[i]);
301 /* Calculate file offset of the first chunk that needs to be read.
302 * Note: if the resource is pipable, the entries in the chunk table do
303 * *not* include the chunk headers. */
304 u64 cur_read_offset = lte->resource_entry.offset + chunk_offsets[0];
305 if (!lte->is_pipable)
306 cur_read_offset += chunk_table_size;
308 cur_read_offset += start_chunk * sizeof(struct pwm_chunk_hdr);
310 /* If using a callback function, allocate a temporary buffer that will
311 * be used to pass data to it. If writing directly to a buffer instead,
312 * arrange to write data directly into it. */
314 u8 *out_buf_end, *out_p;
316 out_buf_size = max(in_chunk_size, orig_chunk_size);
317 if (out_buf_size <= stack_max) {
318 out_buf = alloca(out_buf_size);
320 out_buf = MALLOC(out_buf_size);
321 if (out_buf == NULL) {
322 ret = WIMLIB_ERR_NOMEM;
323 goto out_free_memory;
325 out_buf_malloced = true;
329 out_buf = ctx_or_buf;
331 out_buf_end = out_buf + out_buf_size;
334 /* Unless the raw compressed data was requested, allocate a temporary
335 * buffer for reading compressed chunks, each of which can be at most
336 * orig_chunk_size - 1 bytes. This excludes compressed chunks that are
337 * a full orig_chunk_size bytes, which are actually stored uncompressed.
339 if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS)) {
340 if (orig_chunk_size - 1 <= stack_max) {
341 compressed_buf = alloca(orig_chunk_size - 1);
343 compressed_buf = MALLOC(orig_chunk_size - 1);
344 if (compressed_buf == NULL) {
345 ret = WIMLIB_ERR_NOMEM;
346 goto out_free_memory;
348 compressed_buf_malloced = true;
352 /* Allocate yet another temporary buffer, this one for reading partial
354 if (start_offset_in_chunk != 0 ||
355 (end_offset_in_chunk != orig_chunk_size - 1 &&
356 offset + size != wim_resource_size(lte)))
358 if (orig_chunk_size <= stack_max) {
359 tmp_buf = alloca(orig_chunk_size);
361 tmp_buf = MALLOC(orig_chunk_size);
362 if (tmp_buf == NULL) {
363 ret = WIMLIB_ERR_NOMEM;
364 goto out_free_memory;
366 tmp_buf_malloced = true;
370 /* Read, and possibly decompress, each needed chunk, either writing the
371 * data directly into the @ctx_or_buf buffer or passing it to the @cb
372 * callback function. */
373 for (u64 i = start_chunk; i <= end_chunk; i++) {
375 /* If the resource is pipable, skip the chunk header. */
377 cur_read_offset += sizeof(struct pwm_chunk_hdr);
379 /* Calculate the sizes of the compressed chunk and of the
380 * uncompressed chunk. */
381 u32 compressed_chunk_size;
382 u32 uncompressed_chunk_size;
383 if (i != num_chunks - 1) {
384 /* Not the last chunk. Compressed size is given by
385 * difference of chunk table entries; uncompressed size
386 * is always the WIM chunk size. */
387 compressed_chunk_size = chunk_offsets[i + 1 - start_chunk] -
388 chunk_offsets[i - start_chunk];
389 uncompressed_chunk_size = orig_chunk_size;
391 /* Last chunk. Compressed size is the remaining size in
392 * the compressed resource; uncompressed size is the
393 * remaining size in the uncompressed resource. */
394 compressed_chunk_size = lte->resource_entry.size -
396 chunk_offsets[i - start_chunk];
398 compressed_chunk_size -= num_chunks *
399 sizeof(struct pwm_chunk_hdr);
401 if ((wim_resource_size(lte) & (orig_chunk_size - 1)) == 0)
402 uncompressed_chunk_size = orig_chunk_size;
404 uncompressed_chunk_size = wim_resource_size(lte) &
405 (orig_chunk_size - 1);
408 /* Calculate how much of this chunk needs to be read. */
410 u32 partial_chunk_size;
411 u32 start_offset = 0;
412 u32 end_offset = orig_chunk_size - 1;
414 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
415 partial_chunk_size = compressed_chunk_size;
417 if (i == start_chunk)
418 start_offset = start_offset_in_chunk;
421 end_offset = end_offset_in_chunk;
423 partial_chunk_size = end_offset + 1 - start_offset;
426 if (compressed_chunk_size == uncompressed_chunk_size ||
427 (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
429 /* Chunk stored uncompressed, or reading raw chunk data. */
430 ret = full_pread(in_fd,
433 cur_read_offset + start_offset);
437 /* Compressed chunk and not doing raw read. */
440 /* Read the compressed data into compressed_buf. */
441 ret = full_pread(in_fd,
443 compressed_chunk_size,
448 /* For partial chunks we must buffer the uncompressed
449 * data because we don't need all of it. */
450 if (partial_chunk_size == uncompressed_chunk_size)
455 /* Decompress the chunk. */
456 ret = (*decompress)(compressed_buf,
457 compressed_chunk_size,
459 uncompressed_chunk_size);
461 ERROR("Failed to decompress data.");
462 ret = WIMLIB_ERR_DECOMPRESSION;
464 goto out_free_memory;
466 if (partial_chunk_size != uncompressed_chunk_size)
467 memcpy(out_p, tmp_buf + start_offset,
471 out_p += partial_chunk_size;
474 /* Feed the data to the callback function. */
475 wimlib_assert(offset == 0);
477 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
478 ret = cb(out_buf, out_p - out_buf, ctx_or_buf);
480 goto out_free_memory;
483 } else if (i == end_chunk || out_p == out_buf_end) {
487 for (p = out_buf; p != out_p; p += bytes_sent) {
488 bytes_sent = min(in_chunk_size, out_p - p);
489 ret = cb(p, bytes_sent, ctx_or_buf);
491 goto out_free_memory;
496 cur_read_offset += compressed_chunk_size;
501 if (chunk_offsets_malloced)
503 if (out_buf_malloced)
505 if (compressed_buf_malloced)
506 FREE(compressed_buf);
507 if (tmp_buf_malloced)
512 ERROR_WITH_ERRNO("Error reading compressed file resource");
513 goto out_free_memory;
516 /* Skip over the chunk table at the end of pipable, compressed resource being
517 * read from a pipe. */
519 skip_chunk_table(const struct wim_lookup_table_entry *lte,
520 struct filedes *in_fd)
522 u64 num_chunk_entries = wim_resource_chunks(lte) - 1;
523 u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
524 u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
527 if (num_chunk_entries != 0) {
529 ret = full_pread(in_fd, &dummy, 1,
530 in_fd->offset + chunk_table_size - 1);
537 /* Read and decompress data from a compressed, pipable resource being read from
540 read_pipable_resource(const struct wim_lookup_table_entry *lte,
541 u64 size, consume_data_callback_t cb,
542 u32 in_chunk_size, void *ctx_or_buf,
543 int flags, u64 offset)
545 struct filedes *in_fd;
546 decompress_func_t decompress;
548 const u32 orig_chunk_size = wim_resource_chunk_size(lte);
549 u8 cchunk[orig_chunk_size - 1];
552 u8 *out_buf, *out_buf_end, *out_p;
554 out_buf_size = max(in_chunk_size, orig_chunk_size);
555 out_buf = alloca(out_buf_size);
558 out_buf = ctx_or_buf;
560 out_buf_end = out_buf + out_buf_size;
563 /* Get pointers to appropriate decompression function and the input file
565 decompress = get_decompress_func(wim_resource_compression_type(lte));
566 in_fd = <e->wim->in_fd;
568 /* This function currently assumes the entire resource is being read at
569 * once and that the raw compressed data isn't being requested. This is
570 * based on the fact that this function currently only gets called
571 * during the operation of wimlib_extract_image_from_pipe(). */
572 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW));
573 wimlib_assert(offset == 0);
574 wimlib_assert(size == wim_resource_size(lte));
575 wimlib_assert(in_fd->offset == lte->resource_entry.offset);
578 for (offset = 0; offset < size; offset += chunk_usize) {
579 struct pwm_chunk_hdr chunk_hdr;
582 /* Calculate uncompressed size of next chunk. */
583 chunk_usize = min(orig_chunk_size, size - offset);
585 /* Read the compressed size of the next chunk from the chunk
587 ret = full_read(in_fd, &chunk_hdr, sizeof(chunk_hdr));
591 chunk_csize = le32_to_cpu(chunk_hdr.compressed_size);
593 if (chunk_csize > orig_chunk_size) {
595 ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
599 /* Read chunk data. */
600 ret = full_read(in_fd, cchunk, chunk_csize);
604 if (flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY)
607 /* Decompress chunk if needed. Uncompressed size same
608 * as compressed size means the chunk is uncompressed.
610 if (chunk_csize == chunk_usize) {
611 memcpy(out_p, cchunk, chunk_usize);
613 ret = (*decompress)(cchunk, chunk_csize,
617 ret = WIMLIB_ERR_DECOMPRESSION;
621 out_p += chunk_usize;
623 /* Feed the uncompressed data into the callback function or copy
624 * it into the provided buffer. */
625 if (cb && (out_p == out_buf_end ||
626 offset + chunk_usize == size))
631 for (p = out_buf; p != out_p; p += bytes_sent) {
632 bytes_sent = min(in_chunk_size, out_p - p);
633 ret = cb(p, bytes_sent, ctx_or_buf);
641 ret = skip_chunk_table(lte, in_fd);
647 ERROR_WITH_ERRNO("Error reading compressed file resource");
651 ERROR("Compressed file resource is invalid");
656 * read_partial_wim_resource()-
658 * Read a range of data from a uncompressed or compressed resource in a WIM
659 * file. Data is written into a buffer or fed into a callback function, as
660 * documented in read_resource_prefix().
665 * Just do a normal read, decompressing the data if necessary. @size and
666 * @offset are interpreted relative to the uncompressed contents of the
669 * WIMLIB_READ_RESOURCE_FLAG_RAW_FULL:
670 * Only valid when the resource is compressed: Read the raw contents of
671 * the compressed resource. If the resource is non-pipable, this includes
672 * the chunk table as well as the compressed chunks. If the resource is
673 * pipable, this includes the compressed chunks--- including the chunk
674 * headers--- and the chunk table. The stream header is still *not*
677 * In this mode, @offset is relative to the beginning of the raw contents
678 * of the compressed resource--- that is, the chunk table if the resource
679 * is non-pipable, or the header for the first compressed chunk if the
680 * resource is pipable. @size is the number of raw bytes to read, which
681 * must not overrun the end of the resource. For example, if @offset is 0,
682 * then @size can be at most the raw size of the compressed resource
683 * (@lte->resource_entry.size).
685 * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
686 * Only valid when the resource is compressed and is not being read from a
687 * pipe: Read the raw contents of the compressed chunks of the compressed
688 * resource. For pipable resources, this does *not* include the chunk
689 * headers. If a callback function is being used, it will be called once
690 * for each compressed chunk. The chunk table is excluded. Also, for
691 * pipable resources, the stream and chunk headers are excluded. In this
692 * mode, @size must be exactly the raw size of the compressed resource
693 * (@lte->resource_entry.size) and @offset must be 0.
695 * WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY:
696 * Only valid when the resource is being read from a pipe: Skip over the
697 * requested data rather than feed it to the callback function or write it
698 * into the buffer. No decompression is done.
699 * WIMLIB_READ_RESOURCE_FLAG_RAW_* may not be combined with this flag.
700 * @offset must be 0 and @size must be the uncompressed size of the
704 * WIMLIB_ERR_SUCCESS (0)
705 * WIMLIB_ERR_READ (errno set)
706 * WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0)
707 * WIMLIB_ERR_NOMEM (errno set to ENOMEM)
708 * WIMLIB_ERR_DECOMPRESSION (errno set to EINVAL)
709 * WIMLIB_ERR_INVALID_PIPABLE_WIM (errno set to EINVAL)
711 * or other error code returned by the @cb function.
714 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
715 u64 size, consume_data_callback_t cb,
717 void *ctx_or_buf, int flags, u64 offset)
719 struct filedes *in_fd;
722 /* Make sure the resource is actually located in a WIM file and is not
724 wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
726 /* If a callback was specified, in_chunk_size must be a power of 2 (and
728 wimlib_assert(cb == NULL || is_power_of_2(in_chunk_size));
730 /* If a callback was specified, offset must be zero. */
731 wimlib_assert(cb == NULL || offset == 0);
733 /* Retrieve input file descriptor for the WIM file. */
734 in_fd = <e->wim->in_fd;
736 /* Don't allow raw reads (either full or chunks) of uncompressed
738 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW) ||
739 resource_is_compressed(<e->resource_entry));
741 /* Don't allow seek-only reads unless reading from a pipe; also don't
742 * allow combining SEEK_ONLY with either RAW flag. */
743 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY) ||
744 (!filedes_is_seekable(in_fd) &&
745 !(flags & WIMLIB_READ_RESOURCE_FLAG_RAW)));
747 DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64" "
748 "from %"PRIu64" @ +%"PRIu64" (readflags 0x%08x, resflags 0x%02x%s)",
750 lte->resource_entry.original_size, lte->resource_entry.offset,
751 flags, lte->resource_entry.flags,
752 (lte->is_pipable ? ", pipable" : ""));
754 if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) ||
755 !resource_is_compressed(<e->resource_entry))
757 /* Reading raw resource contents or reading uncompressed
759 wimlib_assert(offset + size <= lte->resource_entry.size);
760 offset += lte->resource_entry.offset;
761 if (flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY) {
762 if (lte->resource_entry.size != 0) {
764 ret = full_pread(in_fd, &dummy, 1,
765 offset + lte->resource_entry.size - 1);
770 /* Send data to callback function */
771 u8 buf[min(in_chunk_size, size)];
773 size_t bytes_to_read = min(in_chunk_size, size);
774 ret = full_pread(in_fd, buf, bytes_to_read,
778 ret = cb(buf, bytes_to_read, ctx_or_buf);
781 size -= bytes_to_read;
782 offset += bytes_to_read;
785 /* Send data directly to a buffer */
786 ret = full_pread(in_fd, ctx_or_buf, size, offset);
791 } else if (lte->is_pipable && !filedes_is_seekable(in_fd)) {
792 /* Reading compressed, pipable resource from pipe. */
793 ret = read_pipable_resource(lte, size, cb,
795 ctx_or_buf, flags, offset);
797 /* Reading compressed, possibly pipable resource from seekable
799 ret = read_compressed_resource(lte, size, cb,
801 ctx_or_buf, flags, offset);
806 ERROR_WITH_ERRNO("Error reading data from WIM");
813 read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
814 size_t size, u64 offset, void *buf)
816 return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset);
820 read_wim_resource_prefix(const struct wim_lookup_table_entry *lte,
822 consume_data_callback_t cb,
827 return read_partial_wim_resource(lte, size, cb, in_chunk_size,
828 ctx_or_buf, flags, 0);
834 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
836 consume_data_callback_t cb,
841 const tchar *filename = lte->file_on_disk;
846 bool out_buf_malloced;
847 const size_t stack_max = 32768;
849 DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"",
850 size, lte->file_on_disk);
852 raw_fd = open(filename, O_RDONLY);
854 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", filename);
855 return WIMLIB_ERR_OPEN;
857 filedes_init(&fd, raw_fd);
858 out_buf_malloced = false;
860 /* Send data to callback function */
861 if (in_chunk_size <= stack_max) {
862 out_buf = alloca(in_chunk_size);
864 out_buf = MALLOC(in_chunk_size);
865 if (out_buf == NULL) {
866 ret = WIMLIB_ERR_NOMEM;
869 out_buf_malloced = true;
872 size_t bytes_to_read;
874 bytes_to_read = min(in_chunk_size, size);
875 ret = full_read(&fd, out_buf, bytes_to_read);
878 ret = cb(out_buf, bytes_to_read, ctx_or_buf);
881 size -= bytes_to_read;
884 /* Send data directly to a buffer */
885 ret = full_read(&fd, ctx_or_buf, size);
893 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", filename);
896 if (out_buf_malloced)
900 #endif /* !__WIN32__ */
903 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
904 u64 size, consume_data_callback_t cb,
906 void *ctx_or_buf, int _ignored_flags)
913 for (u64 offset = 0; offset < size; offset += chunk_size) {
914 chunk_size = min(in_chunk_size, size - offset);
915 ret = cb((const u8*)lte->attached_buffer + offset,
916 chunk_size, ctx_or_buf);
921 memcpy(ctx_or_buf, lte->attached_buffer, size);
926 typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
928 consume_data_callback_t cb,
934 * read_resource_prefix()-
936 * Read the first @size bytes from a generic "resource", which may be located in
937 * the WIM (compressed or uncompressed), in an external file, or directly in an
940 * Feed the data either to a callback function (cb != NULL, passing it
941 * ctx_or_buf), or write it directly into a buffer (cb == NULL, ctx_or_buf
942 * specifies the buffer, which must have room for @size bytes).
944 * When using a callback function, it is called with chunks up to 32768 bytes in
945 * size until the resource is exhausted.
947 * If the resource is located in a WIM file, @flags can be set as documented in
948 * read_partial_wim_resource(). Otherwise @flags are ignored.
951 read_resource_prefix(const struct wim_lookup_table_entry *lte,
952 u64 size, consume_data_callback_t cb, u32 in_chunk_size,
953 void *ctx_or_buf, int flags)
955 static const read_resource_prefix_handler_t handlers[] = {
956 [RESOURCE_IN_WIM] = read_wim_resource_prefix,
958 [RESOURCE_IN_FILE_ON_DISK] = read_win32_file_prefix,
960 [RESOURCE_IN_FILE_ON_DISK] = read_file_on_disk_prefix,
962 [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
964 [RESOURCE_IN_STAGING_FILE] = read_file_on_disk_prefix,
967 [RESOURCE_IN_NTFS_VOLUME] = read_ntfs_file_prefix,
970 [RESOURCE_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix,
973 wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
974 && handlers[lte->resource_location] != NULL);
975 wimlib_assert(cb == NULL || in_chunk_size > 0);
976 return handlers[lte->resource_location](lte, size, cb, in_chunk_size, ctx_or_buf, flags);
980 read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
983 return read_resource_prefix(lte, wim_resource_size(lte), NULL, 0, buf, 0);
987 read_full_resource_into_alloc_buf(const struct wim_lookup_table_entry *lte,
993 if ((size_t)lte->resource_entry.original_size !=
994 lte->resource_entry.original_size)
996 ERROR("Can't read %"PRIu64" byte resource into "
997 "memory", lte->resource_entry.original_size);
998 return WIMLIB_ERR_NOMEM;
1001 buf = MALLOC(lte->resource_entry.original_size);
1003 return WIMLIB_ERR_NOMEM;
1005 ret = read_full_resource_into_buf(lte, buf);
1016 res_entry_to_data(const struct resource_entry *res_entry,
1017 WIMStruct *wim, void **buf_ret)
1020 struct wim_lookup_table_entry *lte;
1022 lte = new_lookup_table_entry();
1024 return WIMLIB_ERR_NOMEM;
1026 copy_resource_entry(<e->resource_entry, res_entry);
1028 lte->part_number = wim->hdr.part_number;
1029 lte_init_wim(lte, wim);
1031 ret = read_full_resource_into_alloc_buf(lte, buf_ret);
1032 free_lookup_table_entry(lte);
1036 struct extract_ctx {
1038 consume_data_callback_t extract_chunk;
1039 void *extract_chunk_arg;
1043 extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size,
1046 struct extract_ctx *ctx = _ctx;
1048 sha1_update(&ctx->sha_ctx, chunk, chunk_size);
1049 return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg);
1052 /* Extracts the first @size bytes of a WIM resource to somewhere. In the
1053 * process, the SHA1 message digest of the resource is checked if the full
1054 * resource is being extracted.
1056 * @extract_chunk is a function that is called to extract each chunk of the
1059 extract_wim_resource(const struct wim_lookup_table_entry *lte,
1061 consume_data_callback_t extract_chunk,
1062 void *extract_chunk_arg)
1065 if (size == wim_resource_size(lte)) {
1067 struct extract_ctx ctx;
1068 ctx.extract_chunk = extract_chunk;
1069 ctx.extract_chunk_arg = extract_chunk_arg;
1070 sha1_init(&ctx.sha_ctx);
1071 ret = read_resource_prefix(lte, size,
1072 extract_chunk_sha1_wrapper,
1073 wim_resource_chunk_size(lte),
1076 u8 hash[SHA1_HASH_SIZE];
1077 sha1_final(hash, &ctx.sha_ctx);
1078 if (!hashes_equal(hash, lte->hash)) {
1079 if (wimlib_print_errors) {
1080 ERROR("Invalid SHA1 message digest "
1081 "on the following WIM resource:");
1082 print_lookup_table_entry(lte, stderr);
1083 if (lte->resource_location == RESOURCE_IN_WIM)
1084 ERROR("The WIM file appears to be corrupt!");
1086 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
1091 ret = read_resource_prefix(lte, size, extract_chunk,
1092 wim_resource_chunk_size(lte),
1093 extract_chunk_arg, 0);
1099 extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p)
1101 struct filedes *fd = _fd_p;
1102 int ret = full_write(fd, buf, len);
1104 ERROR_WITH_ERRNO("Error writing to file descriptor");
1109 extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte,
1110 struct filedes *fd, u64 size)
1112 return extract_wim_resource(lte, size, extract_wim_chunk_to_fd, fd);
1117 sha1_chunk(const void *buf, size_t len, void *ctx)
1119 sha1_update(ctx, buf, len);
1123 /* Calculate the SHA1 message digest of a stream. */
1125 sha1_resource(struct wim_lookup_table_entry *lte)
1130 sha1_init(&sha_ctx);
1131 ret = read_resource_prefix(lte, wim_resource_size(lte),
1132 sha1_chunk, wim_resource_chunk_size(lte),
1135 sha1_final(lte->hash, &sha_ctx);
1139 /* Translates a WIM resource entry from the on-disk format to an in-memory
1142 get_resource_entry(const struct resource_entry_disk *disk_entry,
1143 struct resource_entry *entry)
1145 /* Note: disk_entry may not be 8 byte aligned--- in that case, the
1146 * offset and original_size members will be unaligned. (This should be
1147 * okay since `struct resource_entry_disk' is declared as packed.) */
1149 /* Read the size and flags into a bitfield portably... */
1150 entry->size = (((u64)disk_entry->size[0] << 0) |
1151 ((u64)disk_entry->size[1] << 8) |
1152 ((u64)disk_entry->size[2] << 16) |
1153 ((u64)disk_entry->size[3] << 24) |
1154 ((u64)disk_entry->size[4] << 32) |
1155 ((u64)disk_entry->size[5] << 40) |
1156 ((u64)disk_entry->size[6] << 48));
1157 entry->flags = disk_entry->flags;
1158 entry->offset = le64_to_cpu(disk_entry->offset);
1159 entry->original_size = le64_to_cpu(disk_entry->original_size);
1161 /* offset and original_size are truncated to 62 bits to avoid possible
1162 * overflows, when converting to a signed 64-bit integer (off_t) or when
1163 * adding size or original_size. This is okay since no one would ever
1164 * actually have a WIM bigger than 4611686018427387903 bytes... */
1165 if (entry->offset & 0xc000000000000000ULL) {
1166 WARNING("Truncating offset in resource entry");
1167 entry->offset &= 0x3fffffffffffffffULL;
1169 if (entry->original_size & 0xc000000000000000ULL) {
1170 WARNING("Truncating original_size in resource entry");
1171 entry->original_size &= 0x3fffffffffffffffULL;
1175 /* Translates a WIM resource entry from an in-memory format into the on-disk
1178 put_resource_entry(const struct resource_entry *entry,
1179 struct resource_entry_disk *disk_entry)
1181 /* Note: disk_entry may not be 8 byte aligned--- in that case, the
1182 * offset and original_size members will be unaligned. (This should be
1183 * okay since `struct resource_entry_disk' is declared as packed.) */
1184 u64 size = entry->size;
1186 disk_entry->size[0] = size >> 0;
1187 disk_entry->size[1] = size >> 8;
1188 disk_entry->size[2] = size >> 16;
1189 disk_entry->size[3] = size >> 24;
1190 disk_entry->size[4] = size >> 32;
1191 disk_entry->size[5] = size >> 40;
1192 disk_entry->size[6] = size >> 48;
1193 disk_entry->flags = entry->flags;
1194 disk_entry->offset = cpu_to_le64(entry->offset);
1195 disk_entry->original_size = cpu_to_le64(entry->original_size);