4 * Read uncompressed and compressed metadata and file resources from a WIM file.
8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free Software
14 * Foundation; either version 3 of the License, or (at your option) any later
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License along with
22 * wimlib; if not, see http://www.gnu.org/licenses/.
30 #include "wimlib/dentry.h"
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/lookup_table.h"
35 #include "wimlib/resource.h"
36 #include "wimlib/sha1.h"
39 /* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */
40 # include "wimlib/win32.h"
44 /* for read_ntfs_file_prefix() */
45 # include "wimlib/ntfs_3g.h"
58 * Compressed resources
60 * A compressed resource in a WIM consists of a number of consecutive LZX or
61 * XPRESS-compressed chunks, each of which decompresses to 32768 bytes of data,
62 * except possibly the last, which always decompresses to any remaining bytes.
63 * In addition, immediately before the chunks, a table (the "chunk table")
64 * provides the offset, in bytes relative to the end of the chunk table, of the
65 * start of each compressed chunk, except for the first chunk which is omitted
66 * as it always has an offset of 0. Therefore, a compressed resource with N
67 * chunks will have a chunk table with N - 1 entries.
69 * Additional information:
71 * - Entries in the chunk table are 4 bytes each, except if the uncompressed
72 * size of the resource is greater than 4 GiB, in which case the entries in
73 * the chunk table are 8 bytes each. In either case, the entries are unsigned
74 * little-endian integers.
76 * - The chunk table is included in the compressed size of the resource provided
77 * in the corresponding entry in the WIM's stream lookup table.
79 * - The compressed size of a chunk is never greater than the uncompressed size.
80 * From the compressor's point of view, chunks that would have compressed to a
81 * size greater than or equal to their original size are in fact stored
82 * uncompressed. From the decompresser's point of view, chunks with
83 * compressed size equal to their uncompressed size are in fact uncompressed.
85 * Furthermore, wimlib supports its own "pipable" WIM format, and for this the
86 * structure of compressed resources was modified to allow piped reading and
87 * writing. To make sequential writing possible, the chunk table is placed
88 * after the chunks rather than before the chunks, and to make sequential
89 * reading possible, each chunk is prefixed with a 4-byte header giving its
90 * compressed size as a 32-bit, unsigned, little-endian integer (less than or
91 * equal to 32768). Otherwise the details are the same.
94 typedef int (*decompress_func_t)(const void *, unsigned, void *, unsigned);
96 static decompress_func_t
97 get_decompress_func(int ctype)
99 if (ctype == WIMLIB_COMPRESSION_TYPE_LZX)
100 return wimlib_lzx_decompress;
102 return wimlib_xpress_decompress;
106 * read_compressed_resource()-
108 * Read data from a compressed resource being read from a seekable WIM file.
109 * The resource may be either pipable or non-pipable.
114 * Just do a normal read, decompressing the data if necessary.
116 * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
117 * Read the raw contents of the compressed chunks of the compressed
118 * resource. For pipable resources, this does *not* include the chunk
119 * headers. If a callback function is being used, it will be called once
120 * for each compressed chunk. For non-pipable resources, this mode
121 * excludes the chunk table. For pipable resources, this mode excludes the
122 * stream and chunk headers.
125 read_compressed_resource(const struct wim_lookup_table_entry *lte,
126 u64 size, consume_data_callback_t cb,
127 void *ctx_or_buf, int flags, u64 offset)
131 /* Currently, reading raw compressed chunks is only guaranteed to work
132 * correctly when the full resource is requested. Furthermore, in such
133 * cases the requested size is specified as the compressed size, but
134 * here we change it to an uncompressed size to avoid confusing the rest
135 * of this function. */
136 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
137 wimlib_assert(offset == 0);
138 wimlib_assert(size == lte->resource_entry.size);
139 size = wim_resource_size(lte);
142 wimlib_assert(offset + size <= wim_resource_size(lte));
144 /* Handle the trivial case. */
148 /* Get the appropriate decompression function. */
149 decompress_func_t decompress =
150 get_decompress_func(wim_resource_compression_type(lte));
152 /* Get the file descriptor for the WIM. */
153 struct filedes *in_fd = <e->wim->in_fd;
155 /* Calculate the number of chunks the resource is divided into. */
156 u64 num_chunks = wim_resource_chunks(lte);
158 /* Calculate the number of entries in the chunk table; it's one less
159 * than the number of chunks, since the first chunk has no entry. */
160 u64 num_chunk_entries = num_chunks - 1;
162 /* Calculate the 0-based index of the chunk at which the read starts.
164 u64 start_chunk = offset / WIM_CHUNK_SIZE;
166 /* Calculate the offset, within the start chunk, of the first byte of
168 u64 start_offset_in_chunk = offset % WIM_CHUNK_SIZE;
170 /* Calculate the index of the chunk that contains the last byte of the
172 u64 end_chunk = (offset + size - 1) / WIM_CHUNK_SIZE;
174 /* Calculate the offset, within the end chunk, of the last byte of the
176 u64 end_offset_in_chunk = (offset + size - 1) % WIM_CHUNK_SIZE;
178 /* Calculate the number of chunk entries are actually needed to read the
179 * requested part of the resource. Include an entry for the first chunk
180 * even though that doesn't exist in the on-disk table, but take into
181 * account that if the last chunk required for the read is not the last
182 * chunk of the resource, an extra chunk entry is needed so that the
183 * compressed size of the last chunk of the read can be determined. */
184 u64 num_alloc_chunk_entries = end_chunk - start_chunk + 1;
185 if (end_chunk != num_chunks - 1)
186 num_alloc_chunk_entries++;
188 /* Set the size of each chunk table entry based on the resource's
189 * uncompressed size. */
190 u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
192 /* Calculate the size, in bytes, of the full chunk table. */
193 u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
195 /* Allocate a buffer to hold a subset of the chunk table. It will only
196 * contain offsets for the chunks that are actually needed for this
197 * read. For speed, allocate the buffer on the stack unless it's too
200 bool chunk_offsets_malloced;
201 if (num_alloc_chunk_entries < 1024) {
202 chunk_offsets = alloca(num_alloc_chunk_entries * sizeof(u64));
203 chunk_offsets_malloced = false;
205 chunk_offsets = malloc(num_alloc_chunk_entries * sizeof(u64));
206 if (!chunk_offsets) {
207 ERROR("Failed to allocate chunk table "
208 "with %"PRIu64" entries", num_alloc_chunk_entries);
209 return WIMLIB_ERR_NOMEM;
211 chunk_offsets_malloced = true;
214 /* Set the implicit offset of the first chunk if it's included in the
216 if (start_chunk == 0)
217 chunk_offsets[0] = 0;
219 /* Calculate the index of the first needed entry in the chunk table. */
220 u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
222 /* Calculate the number of entries that need to be read from the chunk
224 u64 num_needed_chunk_entries = (start_chunk == 0) ?
225 num_alloc_chunk_entries - 1 : num_alloc_chunk_entries;
227 /* Calculate the number of bytes of data that need to be read from the
229 size_t chunk_table_needed_size =
230 num_needed_chunk_entries * chunk_entry_size;
231 if ((u64)chunk_table_needed_size !=
232 num_needed_chunk_entries * chunk_entry_size)
234 ERROR("Compressed read request too large to fit into memory!");
235 ret = WIMLIB_ERR_NOMEM;
236 goto out_free_chunk_offsets;
239 /* Calculate the byte offset, in the WIM file, of the first chunk table
240 * entry to read. Take into account that if the WIM file is in the
241 * special "pipable" format, then the chunk table is at the end of the
242 * resource, not the beginning. */
243 u64 file_offset_of_needed_chunk_entries =
244 lte->resource_entry.offset + (start_table_idx *
247 file_offset_of_needed_chunk_entries += lte->resource_entry.size -
250 /* Read the needed chunk table entries into the end of the chunk_offsets
252 void *chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
253 chunk_table_needed_size;
254 ret = full_pread(in_fd, chunk_tab_data, chunk_table_needed_size,
255 file_offset_of_needed_chunk_entries);
259 /* Now fill in chunk_offsets from the entries we have read in
260 * chunk_tab_data. Careful: chunk_offsets aliases chunk_tab_data, which
261 * breaks C's aliasing rules when we read 32-bit integers and store
262 * 64-bit integers. But since the operations are safe as long as the
263 * compiler doesn't mess with their order, we use the gcc may_alias
264 * extension to tell the compiler that loads from the 32-bit integers
265 * may alias stores to the 64-bit integers. */
267 typedef le64 __attribute__((may_alias)) aliased_le64_t;
268 typedef le32 __attribute__((may_alias)) aliased_le32_t;
269 u64 *chunk_offsets_p = chunk_offsets;
272 if (start_chunk == 0)
275 if (chunk_entry_size == 4) {
276 aliased_le32_t *raw_entries = (aliased_le32_t*)chunk_tab_data;
277 for (i = 0; i < num_needed_chunk_entries; i++)
278 chunk_offsets_p[i] = le32_to_cpu(raw_entries[i]);
280 aliased_le64_t *raw_entries = (aliased_le64_t*)chunk_tab_data;
281 for (i = 0; i < num_needed_chunk_entries; i++)
282 chunk_offsets_p[i] = le64_to_cpu(raw_entries[i]);
286 /* Calculate file offset of the first chunk that needs to be read. N.B.
287 * if the resource is pipable, the entries in the chunk table do *not*
288 * include the chunk headers. */
289 u64 cur_read_offset = lte->resource_entry.offset + chunk_offsets[0];
290 if (!lte->is_pipable)
291 cur_read_offset += chunk_table_size;
293 cur_read_offset += start_chunk *
294 sizeof(struct pwm_chunk_hdr);
296 /* If using a callback function, allocate a temporary buffer that will
297 * be used to pass data to it. If writing directly to a buffer instead,
298 * arrange to write data directly into it. */
301 out_p = alloca(WIM_CHUNK_SIZE);
305 /* Unless the raw compressed data was requested, allocate a temporary
306 * buffer for reading compressed chunks, each of which can be at most
307 * WIM_CHUNK_SIZE - 1 bytes. This excludes compressed chunks that are a
308 * full WIM_CHUNK_SIZE bytes, which are handled separately. */
309 void *compressed_buf;
310 if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
311 compressed_buf = alloca(WIM_CHUNK_SIZE - 1);
313 /* Read, and possibly decompress, each needed chunk, either writing the
314 * data directly into the @ctx_or_buf buffer or passing it to the @cb
315 * callback function. */
316 for (u64 i = start_chunk; i <= end_chunk; i++) {
318 /* If the resource is pipable, skip the chunk header. */
320 cur_read_offset += sizeof(struct pwm_chunk_hdr);
322 /* Calculate the sizes of the compressed chunk and of the
323 * uncompressed chunk. */
324 unsigned compressed_chunk_size;
325 unsigned uncompressed_chunk_size;
326 if (i != num_chunks - 1) {
327 /* Not the last chunk. Compressed size is given by
328 * difference of chunk table entries; uncompressed size
329 * is always 32768 bytes. */
330 compressed_chunk_size = chunk_offsets[i + 1 - start_chunk] -
331 chunk_offsets[i - start_chunk];
332 uncompressed_chunk_size = WIM_CHUNK_SIZE;
334 /* Last chunk. Compressed size is the remaining size in
335 * the compressed resource; uncompressed size is the
336 * remaining size in the uncompressed resource. */
337 compressed_chunk_size = lte->resource_entry.size -
339 chunk_offsets[i - start_chunk];
341 compressed_chunk_size -= num_chunks *
342 sizeof(struct pwm_chunk_hdr);
344 if (wim_resource_size(lte) % WIM_CHUNK_SIZE == 0)
345 uncompressed_chunk_size = WIM_CHUNK_SIZE;
347 uncompressed_chunk_size = wim_resource_size(lte) %
351 /* Calculate how much of this chunk needs to be read. */
353 unsigned partial_chunk_size;
354 u64 start_offset = 0;
355 u64 end_offset = WIM_CHUNK_SIZE - 1;
357 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
358 partial_chunk_size = compressed_chunk_size;
360 if (i == start_chunk)
361 start_offset = start_offset_in_chunk;
364 end_offset = end_offset_in_chunk;
366 partial_chunk_size = end_offset + 1 - start_offset;
369 if (compressed_chunk_size == uncompressed_chunk_size ||
370 (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
372 /* Chunk stored uncompressed, or reading raw chunk data. */
373 ret = full_pread(in_fd,
374 cb ? out_p + start_offset : out_p,
376 cur_read_offset + start_offset);
380 /* Compressed chunk and not doing raw read. */
382 /* Read the compressed data into compressed_buf. */
383 ret = full_pread(in_fd,
385 compressed_chunk_size,
390 /* For partial chunks and when writing directly to a
391 * buffer, we must buffer the uncompressed data because
392 * we don't need all of it. */
393 if (partial_chunk_size != uncompressed_chunk_size &&
396 u8 uncompressed_buf[uncompressed_chunk_size];
398 ret = (*decompress)(compressed_buf,
399 compressed_chunk_size,
401 uncompressed_chunk_size);
403 ret = WIMLIB_ERR_DECOMPRESSION;
405 goto out_free_chunk_offsets;
407 memcpy(out_p, uncompressed_buf + start_offset,
410 ret = (*decompress)(compressed_buf,
411 compressed_chunk_size,
413 uncompressed_chunk_size);
415 ret = WIMLIB_ERR_DECOMPRESSION;
417 goto out_free_chunk_offsets;
422 /* Feed the data to the callback function. */
423 ret = cb(out_p + start_offset,
424 partial_chunk_size, ctx_or_buf);
426 goto out_free_chunk_offsets;
428 /* No callback function provided; we are writing
429 * directly to a buffer. Advance the pointer into this
430 * buffer by the number of uncompressed bytes that were
432 out_p += partial_chunk_size;
434 cur_read_offset += compressed_chunk_size;
438 out_free_chunk_offsets:
439 if (chunk_offsets_malloced)
444 ERROR_WITH_ERRNO("Error reading compressed file resource");
445 goto out_free_chunk_offsets;
448 /* Skip over the chunk table at the end of pipable, compressed resource being
449 * read from a pipe. */
451 skip_chunk_table(const struct wim_lookup_table_entry *lte,
452 struct filedes *in_fd)
454 u64 num_chunk_entries = wim_resource_chunks(lte) - 1;
455 u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
456 u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
459 if (num_chunk_entries != 0) {
461 ret = full_pread(in_fd, &dummy, 1,
462 in_fd->offset + chunk_table_size - 1);
469 /* Read and decompress data from a compressed, pipable resource being read from
472 read_pipable_resource(const struct wim_lookup_table_entry *lte,
473 u64 size, consume_data_callback_t cb,
474 void *ctx_or_buf, int flags, u64 offset)
476 struct filedes *in_fd;
477 decompress_func_t decompress;
479 u8 chunk[WIM_CHUNK_SIZE];
480 u8 cchunk[WIM_CHUNK_SIZE - 1];
482 /* Get pointers to appropriate decompression function and the input file
484 decompress = get_decompress_func(wim_resource_compression_type(lte));
485 in_fd = <e->wim->in_fd;
487 /* This function currently assumes the entire resource is being read at
488 * once and that the raw compressed data isn't being requested. This is
489 * based on the fact that this function currently only gets called
490 * during the operation of wimlib_extract_image_from_pipe(). */
491 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW));
492 wimlib_assert(offset == 0);
493 wimlib_assert(size == wim_resource_size(lte));
494 wimlib_assert(in_fd->offset == lte->resource_entry.offset);
496 for (offset = 0; offset < size; offset += WIM_CHUNK_SIZE) {
497 struct pwm_chunk_hdr chunk_hdr;
503 /* Calculate uncompressed size of next chunk. */
504 chunk_size = min(WIM_CHUNK_SIZE, size - offset);
506 /* Read the compressed size of the next chunk from the chunk
508 ret = full_read(in_fd, &chunk_hdr, sizeof(chunk_hdr));
512 cchunk_size = le32_to_cpu(chunk_hdr.compressed_size);
514 if (cchunk_size > WIM_CHUNK_SIZE) {
516 ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
520 /* Read chunk data. */
521 ret = full_read(in_fd, cchunk, cchunk_size);
525 if (flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY)
528 /* Decompress chunk if needed. Uncompressed size same
529 * as compressed size means the chunk is uncompressed.
531 res_chunk_size = chunk_size;
532 if (cchunk_size == chunk_size) {
535 ret = (*decompress)(cchunk, cchunk_size,
539 ret = WIMLIB_ERR_DECOMPRESSION;
545 /* Feed the uncompressed data into the callback function or copy
546 * it into the provided buffer. */
548 ret = cb(res_chunk, res_chunk_size, ctx_or_buf);
552 ctx_or_buf = mempcpy(ctx_or_buf, res_chunk,
557 ret = skip_chunk_table(lte, in_fd);
563 ERROR_WITH_ERRNO("Error reading compressed file resource");
567 ERROR("Compressed file resource is invalid");
572 * read_partial_wim_resource()-
574 * Read a range of data from a uncompressed or compressed resource in a WIM
575 * file. Data is written into a buffer or fed into a callback function, as
576 * documented in read_resource_prefix().
581 * Just do a normal read, decompressing the data if necessary. @size and
582 * @offset are interpreted relative to the uncompressed contents of the
585 * WIMLIB_READ_RESOURCE_FLAG_RAW_FULL:
586 * Only valid when the resource is compressed: Read the raw contents of
587 * the compressed resource. If the resource is non-pipable, this includes
588 * the chunk table as well as the compressed chunks. If the resource is
589 * pipable, this includes the compressed chunks--- including the chunk
590 * headers--- and the chunk table. The stream header is still *not*
593 * In this mode, @offset is relative to the beginning of the raw contents
594 * of the compressed resource--- that is, the chunk table if the resource
595 * is non-pipable, or the header for the first compressed chunk if the
596 * resource is pipable. @size is the number of raw bytes to read, which
597 * must not overrun the end of the resource. For example, if @offset is 0,
598 * then @size can be at most the raw size of the compressed resource
599 * (@lte->resource_entry.size).
601 * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
602 * Only valid when the resource is compressed and is not being read from a
603 * pipe: Read the raw contents of the compressed chunks of the compressed
604 * resource. For pipable resources, this does *not* include the chunk
605 * headers. If a callback function is being used, it will be called once
606 * for each compressed chunk. The chunk table is excluded. Also, for
607 * pipable resources, the stream and chunk headers are excluded. In this
608 * mode, @size must be exactly the raw size of the compressed resource
609 * (@lte->resource_entry.size) and @offset must be 0.
611 * WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY:
612 * Only valid when the resource is being read from a pipe: Skip over the
613 * requested data rather than feed it to the callback function or write it
614 * into the buffer. No decompression is done.
615 * WIMLIB_READ_RESOURCE_FLAG_RAW_* may not be combined with this flag.
616 * @offset must be 0 and @size must be the uncompressed size of the
620 * WIMLIB_ERR_SUCCESS (0)
621 * WIMLIB_ERR_READ (errno set)
622 * WIMLIB_ERR_NOMEM (errno set to ENOMEM)
623 * WIMLIB_ERR_DECOMPRESSION (errno set to EINVAL)
624 * WIMLIB_ERR_INVALID_PIPABLE_WIM (errno set to EINVAL)
626 * or other error code returned by the @cb function.
629 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
630 u64 size, consume_data_callback_t cb,
631 void *ctx_or_buf, int flags, u64 offset)
633 struct filedes *in_fd;
636 /* Make sure the resource is actually located in a WIM file and is not
638 wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
640 /* Retrieve input file descriptor for the WIM file. */
641 in_fd = <e->wim->in_fd;
643 /* Don't allow raw reads (either full or chunks) of uncompressed
645 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW) ||
646 resource_is_compressed(<e->resource_entry));
648 /* Don't allow seek-only reads unless reading from a pipe; also don't
649 * allow combining SEEK_ONLY with either RAW flag. */
650 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY) ||
651 (!filedes_is_seekable(in_fd) &&
652 !(flags & WIMLIB_READ_RESOURCE_FLAG_RAW)));
654 DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64" "
655 "from %"PRIu64" @ +%"PRIu64" (readflags 0x%08x, resflags 0x%02x%s)",
657 lte->resource_entry.original_size, lte->resource_entry.offset,
658 flags, lte->resource_entry.flags,
659 (lte->is_pipable ? ", pipable" : ""));
661 if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) ||
662 !resource_is_compressed(<e->resource_entry))
664 /* Reading raw resource contents or reading uncompressed
666 wimlib_assert(offset + size <= lte->resource_entry.size);
667 offset += lte->resource_entry.offset;
668 if (flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY) {
669 if (lte->resource_entry.size != 0) {
671 ret = full_pread(in_fd, &dummy, 1,
672 offset + lte->resource_entry.size - 1);
677 /* Send data to callback function */
678 u8 buf[min(WIM_CHUNK_SIZE, size)];
680 size_t bytes_to_read = min(WIM_CHUNK_SIZE,
682 ret = full_pread(in_fd, buf, bytes_to_read,
686 ret = cb(buf, bytes_to_read, ctx_or_buf);
689 size -= bytes_to_read;
690 offset += bytes_to_read;
693 /* Send data directly to a buffer */
694 ret = full_pread(in_fd, ctx_or_buf, size, offset);
699 } else if (lte->is_pipable && !filedes_is_seekable(in_fd)) {
700 /* Reading compressed, pipable resource from pipe. */
701 ret = read_pipable_resource(lte, size, cb,
702 ctx_or_buf, flags, offset);
704 /* Reading compressed, possibly pipable resource from seekable
706 ret = read_compressed_resource(lte, size, cb,
707 ctx_or_buf, flags, offset);
712 ERROR_WITH_ERRNO("Error reading data from WIM");
719 read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
720 size_t size, u64 offset, void *buf)
722 return read_partial_wim_resource(lte, size, NULL, buf, 0, offset);
726 read_wim_resource_prefix(const struct wim_lookup_table_entry *lte,
728 consume_data_callback_t cb,
732 return read_partial_wim_resource(lte, size, cb, ctx_or_buf, flags, 0);
738 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
740 consume_data_callback_t cb,
744 const tchar *filename = lte->file_on_disk;
749 DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"",
750 size, lte->file_on_disk);
752 raw_fd = open(filename, O_RDONLY);
754 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", filename);
755 return WIMLIB_ERR_OPEN;
757 filedes_init(&fd, raw_fd);
759 /* Send data to callback function */
760 u8 buf[min(WIM_CHUNK_SIZE, size)];
761 size_t bytes_to_read;
763 bytes_to_read = min(WIM_CHUNK_SIZE, size);
764 ret = full_read(&fd, buf, bytes_to_read);
767 ret = cb(buf, bytes_to_read, ctx_or_buf);
770 size -= bytes_to_read;
773 /* Send data directly to a buffer */
774 ret = full_read(&fd, ctx_or_buf, size);
782 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", filename);
787 #endif /* !__WIN32__ */
790 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
791 u64 size, consume_data_callback_t cb,
792 void *ctx_or_buf, int _ignored_flags)
794 const void *inbuf = lte->attached_buffer;
799 size_t chunk_size = min(WIM_CHUNK_SIZE, size);
800 ret = cb(inbuf, chunk_size, ctx_or_buf);
807 memcpy(ctx_or_buf, inbuf, size);
812 typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
814 consume_data_callback_t cb,
819 * read_resource_prefix()-
821 * Read the first @size bytes from a generic "resource", which may be located in
822 * the WIM (compressed or uncompressed), in an external file, or directly in an
825 * Feed the data either to a callback function (cb != NULL, passing it
826 * ctx_or_buf), or write it directly into a buffer (cb == NULL, ctx_or_buf
827 * specifies the buffer, which must have room for @size bytes).
829 * When using a callback function, it is called with chunks up to 32768 bytes in
830 * size until the resource is exhausted.
832 * If the resource is located in a WIM file, @flags can be set as documented in
833 * read_partial_wim_resource(). Otherwise @flags are ignored.
836 read_resource_prefix(const struct wim_lookup_table_entry *lte,
837 u64 size, consume_data_callback_t cb, void *ctx_or_buf,
840 static const read_resource_prefix_handler_t handlers[] = {
841 [RESOURCE_IN_WIM] = read_wim_resource_prefix,
843 [RESOURCE_IN_FILE_ON_DISK] = read_win32_file_prefix,
845 [RESOURCE_IN_FILE_ON_DISK] = read_file_on_disk_prefix,
847 [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
849 [RESOURCE_IN_STAGING_FILE] = read_file_on_disk_prefix,
852 [RESOURCE_IN_NTFS_VOLUME] = read_ntfs_file_prefix,
855 [RESOURCE_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix,
858 wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
859 && handlers[lte->resource_location] != NULL);
860 return handlers[lte->resource_location](lte, size, cb, ctx_or_buf, flags);
864 read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
867 return read_resource_prefix(lte, wim_resource_size(lte), NULL, buf, 0);
871 read_full_resource_into_alloc_buf(const struct wim_lookup_table_entry *lte,
877 if ((size_t)lte->resource_entry.original_size !=
878 lte->resource_entry.original_size)
880 ERROR("Can't read %"PRIu64" byte resource into "
881 "memory", lte->resource_entry.original_size);
882 return WIMLIB_ERR_NOMEM;
885 buf = MALLOC(lte->resource_entry.original_size);
887 return WIMLIB_ERR_NOMEM;
889 ret = read_full_resource_into_buf(lte, buf);
900 res_entry_to_data(const struct resource_entry *res_entry,
901 WIMStruct *wim, void **buf_ret)
904 struct wim_lookup_table_entry *lte;
906 lte = new_lookup_table_entry();
908 return WIMLIB_ERR_NOMEM;
910 copy_resource_entry(<e->resource_entry, res_entry);
912 lte->part_number = wim->hdr.part_number;
913 lte_init_wim(lte, wim);
915 ret = read_full_resource_into_alloc_buf(lte, buf_ret);
916 free_lookup_table_entry(lte);
922 consume_data_callback_t extract_chunk;
923 void *extract_chunk_arg;
927 extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size,
930 struct extract_ctx *ctx = _ctx;
932 sha1_update(&ctx->sha_ctx, chunk, chunk_size);
933 return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg);
936 /* Extracts the first @size bytes of a WIM resource to somewhere. In the
937 * process, the SHA1 message digest of the resource is checked if the full
938 * resource is being extracted.
940 * @extract_chunk is a function that is called to extract each chunk of the
943 extract_wim_resource(const struct wim_lookup_table_entry *lte,
945 consume_data_callback_t extract_chunk,
946 void *extract_chunk_arg)
949 if (size == wim_resource_size(lte)) {
951 struct extract_ctx ctx;
952 ctx.extract_chunk = extract_chunk;
953 ctx.extract_chunk_arg = extract_chunk_arg;
954 sha1_init(&ctx.sha_ctx);
955 ret = read_resource_prefix(lte, size,
956 extract_chunk_sha1_wrapper,
959 u8 hash[SHA1_HASH_SIZE];
960 sha1_final(hash, &ctx.sha_ctx);
961 if (!hashes_equal(hash, lte->hash)) {
962 if (wimlib_print_errors) {
963 ERROR("Invalid SHA1 message digest "
964 "on the following WIM resource:");
965 print_lookup_table_entry(lte, stderr);
966 if (lte->resource_location == RESOURCE_IN_WIM)
967 ERROR("The WIM file appears to be corrupt!");
969 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
974 ret = read_resource_prefix(lte, size, extract_chunk,
975 extract_chunk_arg, 0);
981 extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p)
983 struct filedes *fd = _fd_p;
984 int ret = full_write(fd, buf, len);
986 ERROR_WITH_ERRNO("Error writing to file descriptor");
991 extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte,
992 struct filedes *fd, u64 size)
994 return extract_wim_resource(lte, size, extract_wim_chunk_to_fd, fd);
999 sha1_chunk(const void *buf, size_t len, void *ctx)
1001 sha1_update(ctx, buf, len);
1005 /* Calculate the SHA1 message digest of a stream. */
1007 sha1_resource(struct wim_lookup_table_entry *lte)
1012 sha1_init(&sha_ctx);
1013 ret = read_resource_prefix(lte, wim_resource_size(lte),
1014 sha1_chunk, &sha_ctx, 0);
1016 sha1_final(lte->hash, &sha_ctx);
1020 /* Translates a WIM resource entry from the on-disk format to an in-memory
1023 get_resource_entry(const struct resource_entry_disk *disk_entry,
1024 struct resource_entry *entry)
1026 /* Note: disk_entry may not be 8 byte aligned--- in that case, the
1027 * offset and original_size members will be unaligned. (This should be
1028 * okay since `struct resource_entry_disk' is declared as packed.) */
1030 /* Read the size and flags into a bitfield portably... */
1031 entry->size = (((u64)disk_entry->size[0] << 0) |
1032 ((u64)disk_entry->size[1] << 8) |
1033 ((u64)disk_entry->size[2] << 16) |
1034 ((u64)disk_entry->size[3] << 24) |
1035 ((u64)disk_entry->size[4] << 32) |
1036 ((u64)disk_entry->size[5] << 40) |
1037 ((u64)disk_entry->size[6] << 48));
1038 entry->flags = disk_entry->flags;
1039 entry->offset = le64_to_cpu(disk_entry->offset);
1040 entry->original_size = le64_to_cpu(disk_entry->original_size);
1042 /* offset and original_size are truncated to 62 bits to avoid possible
1043 * overflows, when converting to a signed 64-bit integer (off_t) or when
1044 * adding size or original_size. This is okay since no one would ever
1045 * actually have a WIM bigger than 4611686018427387903 bytes... */
1046 if (entry->offset & 0xc000000000000000ULL) {
1047 WARNING("Truncating offset in resource entry");
1048 entry->offset &= 0x3fffffffffffffffULL;
1050 if (entry->original_size & 0xc000000000000000ULL) {
1051 WARNING("Truncating original_size in resource entry");
1052 entry->original_size &= 0x3fffffffffffffffULL;
1056 /* Translates a WIM resource entry from an in-memory format into the on-disk
1059 put_resource_entry(const struct resource_entry *entry,
1060 struct resource_entry_disk *disk_entry)
1062 /* Note: disk_entry may not be 8 byte aligned--- in that case, the
1063 * offset and original_size members will be unaligned. (This should be
1064 * okay since `struct resource_entry_disk' is declared as packed.) */
1065 u64 size = entry->size;
1067 disk_entry->size[0] = size >> 0;
1068 disk_entry->size[1] = size >> 8;
1069 disk_entry->size[2] = size >> 16;
1070 disk_entry->size[3] = size >> 24;
1071 disk_entry->size[4] = size >> 32;
1072 disk_entry->size[5] = size >> 40;
1073 disk_entry->size[6] = size >> 48;
1074 disk_entry->flags = entry->flags;
1075 disk_entry->offset = cpu_to_le64(entry->offset);
1076 disk_entry->original_size = cpu_to_le64(entry->original_size);