4 * Read uncompressed and compressed metadata and file resources from a WIM file.
8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free Software
14 * Foundation; either version 3 of the License, or (at your option) any later
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License along with
22 * wimlib; if not, see http://www.gnu.org/licenses/.
30 #include "wimlib/dentry.h"
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/lookup_table.h"
35 #include "wimlib/resource.h"
36 #include "wimlib/sha1.h"
39 /* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */
40 # include "wimlib/win32.h"
44 /* for read_ntfs_file_prefix() */
45 # include "wimlib/ntfs_3g.h"
58 * Compressed resources
60 * A compressed resource in a WIM consists of a number of consecutive LZX or
61 * XPRESS-compressed chunks, each of which decompresses to 32768 bytes of data,
62 * except possibly the last, which always decompresses to any remaining bytes.
63 * In addition, immediately before the chunks, a table (the "chunk table")
64 * provides the offset, in bytes relative to the end of the chunk table, of the
65 * start of each compressed chunk, except for the first chunk which is omitted
66 * as it always has an offset of 0. Therefore, a compressed resource with N
67 * chunks will have a chunk table with N - 1 entries.
69 * Additional information:
71 * - Entries in the chunk table are 4 bytes each, except if the uncompressed
72 * size of the resource is greater than 4 GiB, in which case the entries in
73 * the chunk table are 8 bytes each. In either case, the entries are unsigned
74 * little-endian integers.
76 * - The chunk table is included in the compressed size of the resource provided
77 * in the corresponding entry in the WIM's stream lookup table.
79 * - The compressed size of a chunk is never greater than the uncompressed size.
80 * From the compressor's point of view, chunks that would have compressed to a
81 * size greater than or equal to their original size are in fact stored
82 * uncompressed. From the decompresser's point of view, chunks with
83 * compressed size equal to their uncompressed size are in fact uncompressed.
85 * Furthermore, wimlib supports its own "pipable" WIM format, and for this the
86 * structure of compressed resources was modified to allow piped reading and
87 * writing. To make sequential writing possible, the chunk table is placed
88 * after the chunks rather than before the chunks, and to make sequential
89 * reading possible, each chunk is prefixed with a 4-byte header giving its
90 * compressed size as a 32-bit, unsigned, little-endian integer (less than or
91 * equal to 32768). Otherwise the details are the same.
94 typedef int (*decompress_func_t)(const void *, unsigned, void *, unsigned);
96 static decompress_func_t
97 get_decompress_func(int ctype)
99 if (ctype == WIMLIB_COMPRESSION_TYPE_LZX)
100 return wimlib_lzx_decompress;
102 return wimlib_xpress_decompress;
106 * read_compressed_resource()-
108 * Read data from a compressed resource being read from a seekable WIM file.
109 * The resource may be either pipable or non-pipable.
114 * Just do a normal read, decompressing the data if necessary.
116 * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
117 * Read the raw contents of the compressed chunks of the compressed
118 * resource. For pipable resources, this does *not* include the chunk
119 * headers. If a callback function is being used, it will be called once
120 * for each compressed chunk. For non-pipable resources, this mode
121 * excludes the chunk table. For pipable resources, this mode excludes the
122 * stream and chunk headers.
125 read_compressed_resource(const struct wim_lookup_table_entry *lte,
126 u64 size, consume_data_callback_t cb,
127 void *ctx_or_buf, int flags, u64 offset)
131 /* Currently, reading raw compressed chunks is only guaranteed to work
132 * correctly when the full resource is requested. Furthermore, in such
133 * cases the requested size is specified as the compressed size, but
134 * here we change it to an uncompressed size to avoid confusing the rest
135 * of this function. */
136 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
137 wimlib_assert(offset == 0);
138 wimlib_assert(size == lte->resource_entry.size);
139 size = wim_resource_size(lte);
142 wimlib_assert(offset + size <= wim_resource_size(lte));
144 /* Handle the trivial case. */
148 /* Get the appropriate decompression function. */
149 decompress_func_t decompress =
150 get_decompress_func(wim_resource_compression_type(lte));
152 /* Get the file descriptor for the WIM. */
153 struct filedes *in_fd = <e->wim->in_fd;
155 /* Calculate the number of chunks the resource is divided into. */
156 u64 num_chunks = wim_resource_chunks(lte);
158 /* Calculate the number of entries in the chunk table; it's one less
159 * than the number of chunks, since the first chunk has no entry. */
160 u64 num_chunk_entries = num_chunks - 1;
162 /* Calculate the 0-based index of the chunk at which the read starts.
164 u64 start_chunk = offset / WIM_CHUNK_SIZE;
166 /* Calculate the offset, within the start chunk, of the first byte of
168 u64 start_offset_in_chunk = offset % WIM_CHUNK_SIZE;
170 /* Calculate the index of the chunk that contains the last byte of the
172 u64 end_chunk = (offset + size - 1) / WIM_CHUNK_SIZE;
174 /* Calculate the offset, within the end chunk, of the last byte of the
176 u64 end_offset_in_chunk = (offset + size - 1) % WIM_CHUNK_SIZE;
178 /* Calculate the number of chunk entries are actually needed to read the
179 * requested part of the resource. Include an entry for the first chunk
180 * even though that doesn't exist in the on-disk table, but take into
181 * account that if the last chunk required for the read is not the last
182 * chunk of the resource, an extra chunk entry is needed so that the
183 * compressed size of the last chunk of the read can be determined. */
184 u64 num_alloc_chunk_entries = end_chunk - start_chunk + 1;
185 if (end_chunk != num_chunks - 1)
186 num_alloc_chunk_entries++;
188 /* Set the size of each chunk table entry based on the resource's
189 * uncompressed size. */
190 u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
192 /* Calculate the size, in bytes, of the full chunk table. */
193 u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
195 /* Allocate a buffer to hold a subset of the chunk table. It will only
196 * contain offsets for the chunks that are actually needed for this
197 * read. For speed, allocate the buffer on the stack unless it's too
200 bool chunk_offsets_malloced;
201 if (num_alloc_chunk_entries < 1024) {
202 chunk_offsets = alloca(num_alloc_chunk_entries * sizeof(u64));
203 chunk_offsets_malloced = false;
205 chunk_offsets = malloc(num_alloc_chunk_entries * sizeof(u64));
206 if (!chunk_offsets) {
207 ERROR("Failed to allocate chunk table "
208 "with %"PRIu64" entries", num_alloc_chunk_entries);
209 return WIMLIB_ERR_NOMEM;
211 chunk_offsets_malloced = true;
214 /* Set the implicit offset of the first chunk if it's included in the
216 if (start_chunk == 0)
217 chunk_offsets[0] = 0;
219 /* Calculate the index of the first needed entry in the chunk table. */
220 u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
222 /* Calculate the number of entries that need to be read from the chunk
224 u64 num_needed_chunk_entries = (start_chunk == 0) ?
225 num_alloc_chunk_entries - 1 : num_alloc_chunk_entries;
227 /* Calculate the number of bytes of data that need to be read from the
229 size_t chunk_table_needed_size =
230 num_needed_chunk_entries * chunk_entry_size;
231 if ((u64)chunk_table_needed_size !=
232 num_needed_chunk_entries * chunk_entry_size)
234 ERROR("Compressed read request too large to fit into memory!");
235 ret = WIMLIB_ERR_NOMEM;
236 goto out_free_chunk_offsets;
239 /* Calculate the byte offset, in the WIM file, of the first chunk table
240 * entry to read. Take into account that if the WIM file is in the
241 * special "pipable" format, then the chunk table is at the end of the
242 * resource, not the beginning. */
243 u64 file_offset_of_needed_chunk_entries =
244 lte->resource_entry.offset + (start_table_idx *
247 file_offset_of_needed_chunk_entries += lte->resource_entry.size -
250 /* Read the needed chunk table entries into the end of the chunk_offsets
252 void *chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
253 chunk_table_needed_size;
254 ret = full_pread(in_fd, chunk_tab_data, chunk_table_needed_size,
255 file_offset_of_needed_chunk_entries);
259 /* Now fill in chunk_offsets from the entries we have read in
260 * chunk_tab_data. Careful: chunk_offsets aliases chunk_tab_data, which
261 * breaks C's aliasing rules when we read 32-bit integers and store
262 * 64-bit integers. But since the operations are safe as long as the
263 * compiler doesn't mess with their order, we use the gcc may_alias
264 * extension to tell the compiler that loads from the 32-bit integers
265 * may alias stores to the 64-bit integers. */
267 typedef le64 __attribute__((may_alias)) aliased_le64_t;
268 typedef le32 __attribute__((may_alias)) aliased_le32_t;
269 u64 *chunk_offsets_p = chunk_offsets;
272 if (start_chunk == 0)
275 if (chunk_entry_size == 4) {
276 aliased_le32_t *raw_entries = (aliased_le32_t*)chunk_tab_data;
277 for (i = 0; i < num_needed_chunk_entries; i++)
278 chunk_offsets_p[i] = le32_to_cpu(raw_entries[i]);
280 aliased_le64_t *raw_entries = (aliased_le64_t*)chunk_tab_data;
281 for (i = 0; i < num_needed_chunk_entries; i++)
282 chunk_offsets_p[i] = le64_to_cpu(raw_entries[i]);
286 /* Calculate file offset of the first chunk that needs to be read. N.B.
287 * if the resource is pipable, the entries in the chunk table do *not*
288 * include the chunk headers. */
289 u64 cur_read_offset = lte->resource_entry.offset + chunk_offsets[0];
290 if (!lte->is_pipable)
291 cur_read_offset += chunk_table_size;
293 cur_read_offset += start_chunk *
294 sizeof(struct pwm_chunk_hdr);
296 /* If using a callback function, allocate a temporary buffer that will
297 * be used to pass data to it. If writing directly to a buffer instead,
298 * arrange to write data directly into it. */
301 out_p = alloca(WIM_CHUNK_SIZE);
305 /* Unless the raw compressed data was requested, allocate a temporary
306 * buffer for reading compressed chunks, each of which can be at most
307 * WIM_CHUNK_SIZE - 1 bytes. This excludes compressed chunks that are a
308 * full WIM_CHUNK_SIZE bytes, which are handled separately. */
309 void *compressed_buf;
310 if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
311 compressed_buf = alloca(WIM_CHUNK_SIZE - 1);
313 /* Read, and possibly decompress, each needed chunk, either writing the
314 * data directly into the @ctx_or_buf buffer or passing it to the @cb
315 * callback function. */
316 for (u64 i = start_chunk; i <= end_chunk; i++) {
318 /* If the resource is pipable, skip the chunk header. */
320 cur_read_offset += sizeof(struct pwm_chunk_hdr);
322 /* Calculate the sizes of the compressed chunk and of the
323 * uncompressed chunk. */
324 unsigned compressed_chunk_size;
325 unsigned uncompressed_chunk_size;
326 if (i != num_chunks - 1) {
327 /* Not the last chunk. Compressed size is given by
328 * difference of chunk table entries; uncompressed size
329 * is always 32768 bytes. */
330 compressed_chunk_size = chunk_offsets[i + 1 - start_chunk] -
331 chunk_offsets[i - start_chunk];
332 uncompressed_chunk_size = WIM_CHUNK_SIZE;
334 /* Last chunk. Compressed size is the remaining size in
335 * the compressed resource; uncompressed size is the
336 * remaining size in the uncompressed resource. */
337 compressed_chunk_size = lte->resource_entry.size -
339 chunk_offsets[i - start_chunk];
341 compressed_chunk_size -= num_chunks *
342 sizeof(struct pwm_chunk_hdr);
344 if (wim_resource_size(lte) % WIM_CHUNK_SIZE == 0)
345 uncompressed_chunk_size = WIM_CHUNK_SIZE;
347 uncompressed_chunk_size = wim_resource_size(lte) %
351 /* Calculate how much of this chunk needs to be read. */
353 unsigned partial_chunk_size;
354 u64 start_offset = 0;
355 u64 end_offset = WIM_CHUNK_SIZE - 1;
357 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
358 partial_chunk_size = compressed_chunk_size;
360 if (i == start_chunk)
361 start_offset = start_offset_in_chunk;
364 end_offset = end_offset_in_chunk;
366 partial_chunk_size = end_offset + 1 - start_offset;
369 if (compressed_chunk_size == uncompressed_chunk_size ||
370 (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
372 /* Chunk stored uncompressed, or reading raw chunk data. */
373 ret = full_pread(in_fd,
374 cb ? out_p + start_offset : out_p,
376 cur_read_offset + start_offset);
380 /* Compressed chunk and not doing raw read. */
382 /* Read the compressed data into compressed_buf. */
383 ret = full_pread(in_fd,
385 compressed_chunk_size,
390 /* For partial chunks and when writing directly to a
391 * buffer, we must buffer the uncompressed data because
392 * we don't need all of it. */
393 if (partial_chunk_size != uncompressed_chunk_size &&
396 u8 uncompressed_buf[uncompressed_chunk_size];
398 ret = (*decompress)(compressed_buf,
399 compressed_chunk_size,
401 uncompressed_chunk_size);
403 ERROR("Failed to decompress data.");
404 ret = WIMLIB_ERR_DECOMPRESSION;
406 goto out_free_chunk_offsets;
408 memcpy(out_p, uncompressed_buf + start_offset,
411 ret = (*decompress)(compressed_buf,
412 compressed_chunk_size,
414 uncompressed_chunk_size);
416 ERROR("Failed to decompress data.");
417 ret = WIMLIB_ERR_DECOMPRESSION;
419 goto out_free_chunk_offsets;
424 /* Feed the data to the callback function. */
425 ret = cb(out_p + start_offset,
426 partial_chunk_size, ctx_or_buf);
428 goto out_free_chunk_offsets;
430 /* No callback function provided; we are writing
431 * directly to a buffer. Advance the pointer into this
432 * buffer by the number of uncompressed bytes that were
434 out_p += partial_chunk_size;
436 cur_read_offset += compressed_chunk_size;
440 out_free_chunk_offsets:
441 if (chunk_offsets_malloced)
446 ERROR_WITH_ERRNO("Error reading compressed file resource");
447 goto out_free_chunk_offsets;
450 /* Skip over the chunk table at the end of pipable, compressed resource being
451 * read from a pipe. */
453 skip_chunk_table(const struct wim_lookup_table_entry *lte,
454 struct filedes *in_fd)
456 u64 num_chunk_entries = wim_resource_chunks(lte) - 1;
457 u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
458 u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
461 if (num_chunk_entries != 0) {
463 ret = full_pread(in_fd, &dummy, 1,
464 in_fd->offset + chunk_table_size - 1);
471 /* Read and decompress data from a compressed, pipable resource being read from
474 read_pipable_resource(const struct wim_lookup_table_entry *lte,
475 u64 size, consume_data_callback_t cb,
476 void *ctx_or_buf, int flags, u64 offset)
478 struct filedes *in_fd;
479 decompress_func_t decompress;
481 u8 chunk[WIM_CHUNK_SIZE];
482 u8 cchunk[WIM_CHUNK_SIZE - 1];
484 /* Get pointers to appropriate decompression function and the input file
486 decompress = get_decompress_func(wim_resource_compression_type(lte));
487 in_fd = <e->wim->in_fd;
489 /* This function currently assumes the entire resource is being read at
490 * once and that the raw compressed data isn't being requested. This is
491 * based on the fact that this function currently only gets called
492 * during the operation of wimlib_extract_image_from_pipe(). */
493 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW));
494 wimlib_assert(offset == 0);
495 wimlib_assert(size == wim_resource_size(lte));
496 wimlib_assert(in_fd->offset == lte->resource_entry.offset);
498 for (offset = 0; offset < size; offset += WIM_CHUNK_SIZE) {
499 struct pwm_chunk_hdr chunk_hdr;
505 /* Calculate uncompressed size of next chunk. */
506 chunk_size = min(WIM_CHUNK_SIZE, size - offset);
508 /* Read the compressed size of the next chunk from the chunk
510 ret = full_read(in_fd, &chunk_hdr, sizeof(chunk_hdr));
514 cchunk_size = le32_to_cpu(chunk_hdr.compressed_size);
516 if (cchunk_size > WIM_CHUNK_SIZE) {
518 ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
522 /* Read chunk data. */
523 ret = full_read(in_fd, cchunk, cchunk_size);
527 if (flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY)
530 /* Decompress chunk if needed. Uncompressed size same
531 * as compressed size means the chunk is uncompressed.
533 res_chunk_size = chunk_size;
534 if (cchunk_size == chunk_size) {
537 ret = (*decompress)(cchunk, cchunk_size,
541 ret = WIMLIB_ERR_DECOMPRESSION;
547 /* Feed the uncompressed data into the callback function or copy
548 * it into the provided buffer. */
550 ret = cb(res_chunk, res_chunk_size, ctx_or_buf);
554 ctx_or_buf = mempcpy(ctx_or_buf, res_chunk,
559 ret = skip_chunk_table(lte, in_fd);
565 ERROR_WITH_ERRNO("Error reading compressed file resource");
569 ERROR("Compressed file resource is invalid");
574 * read_partial_wim_resource()-
576 * Read a range of data from a uncompressed or compressed resource in a WIM
577 * file. Data is written into a buffer or fed into a callback function, as
578 * documented in read_resource_prefix().
583 * Just do a normal read, decompressing the data if necessary. @size and
584 * @offset are interpreted relative to the uncompressed contents of the
587 * WIMLIB_READ_RESOURCE_FLAG_RAW_FULL:
588 * Only valid when the resource is compressed: Read the raw contents of
589 * the compressed resource. If the resource is non-pipable, this includes
590 * the chunk table as well as the compressed chunks. If the resource is
591 * pipable, this includes the compressed chunks--- including the chunk
592 * headers--- and the chunk table. The stream header is still *not*
595 * In this mode, @offset is relative to the beginning of the raw contents
596 * of the compressed resource--- that is, the chunk table if the resource
597 * is non-pipable, or the header for the first compressed chunk if the
598 * resource is pipable. @size is the number of raw bytes to read, which
599 * must not overrun the end of the resource. For example, if @offset is 0,
600 * then @size can be at most the raw size of the compressed resource
601 * (@lte->resource_entry.size).
603 * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
604 * Only valid when the resource is compressed and is not being read from a
605 * pipe: Read the raw contents of the compressed chunks of the compressed
606 * resource. For pipable resources, this does *not* include the chunk
607 * headers. If a callback function is being used, it will be called once
608 * for each compressed chunk. The chunk table is excluded. Also, for
609 * pipable resources, the stream and chunk headers are excluded. In this
610 * mode, @size must be exactly the raw size of the compressed resource
611 * (@lte->resource_entry.size) and @offset must be 0.
613 * WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY:
614 * Only valid when the resource is being read from a pipe: Skip over the
615 * requested data rather than feed it to the callback function or write it
616 * into the buffer. No decompression is done.
617 * WIMLIB_READ_RESOURCE_FLAG_RAW_* may not be combined with this flag.
618 * @offset must be 0 and @size must be the uncompressed size of the
622 * WIMLIB_ERR_SUCCESS (0)
623 * WIMLIB_ERR_READ (errno set)
624 * WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0)
625 * WIMLIB_ERR_NOMEM (errno set to ENOMEM)
626 * WIMLIB_ERR_DECOMPRESSION (errno set to EINVAL)
627 * WIMLIB_ERR_INVALID_PIPABLE_WIM (errno set to EINVAL)
629 * or other error code returned by the @cb function.
632 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
633 u64 size, consume_data_callback_t cb,
634 void *ctx_or_buf, int flags, u64 offset)
636 struct filedes *in_fd;
639 /* Make sure the resource is actually located in a WIM file and is not
641 wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
643 /* Retrieve input file descriptor for the WIM file. */
644 in_fd = <e->wim->in_fd;
646 /* Don't allow raw reads (either full or chunks) of uncompressed
648 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW) ||
649 resource_is_compressed(<e->resource_entry));
651 /* Don't allow seek-only reads unless reading from a pipe; also don't
652 * allow combining SEEK_ONLY with either RAW flag. */
653 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY) ||
654 (!filedes_is_seekable(in_fd) &&
655 !(flags & WIMLIB_READ_RESOURCE_FLAG_RAW)));
657 DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64" "
658 "from %"PRIu64" @ +%"PRIu64" (readflags 0x%08x, resflags 0x%02x%s)",
660 lte->resource_entry.original_size, lte->resource_entry.offset,
661 flags, lte->resource_entry.flags,
662 (lte->is_pipable ? ", pipable" : ""));
664 if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) ||
665 !resource_is_compressed(<e->resource_entry))
667 /* Reading raw resource contents or reading uncompressed
669 wimlib_assert(offset + size <= lte->resource_entry.size);
670 offset += lte->resource_entry.offset;
671 if (flags & WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY) {
672 if (lte->resource_entry.size != 0) {
674 ret = full_pread(in_fd, &dummy, 1,
675 offset + lte->resource_entry.size - 1);
680 /* Send data to callback function */
681 u8 buf[min(WIM_CHUNK_SIZE, size)];
683 size_t bytes_to_read = min(WIM_CHUNK_SIZE,
685 ret = full_pread(in_fd, buf, bytes_to_read,
689 ret = cb(buf, bytes_to_read, ctx_or_buf);
692 size -= bytes_to_read;
693 offset += bytes_to_read;
696 /* Send data directly to a buffer */
697 ret = full_pread(in_fd, ctx_or_buf, size, offset);
702 } else if (lte->is_pipable && !filedes_is_seekable(in_fd)) {
703 /* Reading compressed, pipable resource from pipe. */
704 ret = read_pipable_resource(lte, size, cb,
705 ctx_or_buf, flags, offset);
707 /* Reading compressed, possibly pipable resource from seekable
709 ret = read_compressed_resource(lte, size, cb,
710 ctx_or_buf, flags, offset);
715 ERROR_WITH_ERRNO("Error reading data from WIM");
722 read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
723 size_t size, u64 offset, void *buf)
725 return read_partial_wim_resource(lte, size, NULL, buf, 0, offset);
729 read_wim_resource_prefix(const struct wim_lookup_table_entry *lte,
731 consume_data_callback_t cb,
735 return read_partial_wim_resource(lte, size, cb, ctx_or_buf, flags, 0);
741 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
743 consume_data_callback_t cb,
747 const tchar *filename = lte->file_on_disk;
752 DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"",
753 size, lte->file_on_disk);
755 raw_fd = open(filename, O_RDONLY);
757 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", filename);
758 return WIMLIB_ERR_OPEN;
760 filedes_init(&fd, raw_fd);
762 /* Send data to callback function */
763 u8 buf[min(WIM_CHUNK_SIZE, size)];
764 size_t bytes_to_read;
766 bytes_to_read = min(WIM_CHUNK_SIZE, size);
767 ret = full_read(&fd, buf, bytes_to_read);
770 ret = cb(buf, bytes_to_read, ctx_or_buf);
773 size -= bytes_to_read;
776 /* Send data directly to a buffer */
777 ret = full_read(&fd, ctx_or_buf, size);
785 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", filename);
790 #endif /* !__WIN32__ */
793 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
794 u64 size, consume_data_callback_t cb,
795 void *ctx_or_buf, int _ignored_flags)
797 const void *inbuf = lte->attached_buffer;
802 size_t chunk_size = min(WIM_CHUNK_SIZE, size);
803 ret = cb(inbuf, chunk_size, ctx_or_buf);
810 memcpy(ctx_or_buf, inbuf, size);
815 typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
817 consume_data_callback_t cb,
822 * read_resource_prefix()-
824 * Read the first @size bytes from a generic "resource", which may be located in
825 * the WIM (compressed or uncompressed), in an external file, or directly in an
828 * Feed the data either to a callback function (cb != NULL, passing it
829 * ctx_or_buf), or write it directly into a buffer (cb == NULL, ctx_or_buf
830 * specifies the buffer, which must have room for @size bytes).
832 * When using a callback function, it is called with chunks up to 32768 bytes in
833 * size until the resource is exhausted.
835 * If the resource is located in a WIM file, @flags can be set as documented in
836 * read_partial_wim_resource(). Otherwise @flags are ignored.
839 read_resource_prefix(const struct wim_lookup_table_entry *lte,
840 u64 size, consume_data_callback_t cb, void *ctx_or_buf,
843 static const read_resource_prefix_handler_t handlers[] = {
844 [RESOURCE_IN_WIM] = read_wim_resource_prefix,
846 [RESOURCE_IN_FILE_ON_DISK] = read_win32_file_prefix,
848 [RESOURCE_IN_FILE_ON_DISK] = read_file_on_disk_prefix,
850 [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
852 [RESOURCE_IN_STAGING_FILE] = read_file_on_disk_prefix,
855 [RESOURCE_IN_NTFS_VOLUME] = read_ntfs_file_prefix,
858 [RESOURCE_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix,
861 wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
862 && handlers[lte->resource_location] != NULL);
863 return handlers[lte->resource_location](lte, size, cb, ctx_or_buf, flags);
867 read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
870 return read_resource_prefix(lte, wim_resource_size(lte), NULL, buf, 0);
874 read_full_resource_into_alloc_buf(const struct wim_lookup_table_entry *lte,
880 if ((size_t)lte->resource_entry.original_size !=
881 lte->resource_entry.original_size)
883 ERROR("Can't read %"PRIu64" byte resource into "
884 "memory", lte->resource_entry.original_size);
885 return WIMLIB_ERR_NOMEM;
888 buf = MALLOC(lte->resource_entry.original_size);
890 return WIMLIB_ERR_NOMEM;
892 ret = read_full_resource_into_buf(lte, buf);
903 res_entry_to_data(const struct resource_entry *res_entry,
904 WIMStruct *wim, void **buf_ret)
907 struct wim_lookup_table_entry *lte;
909 lte = new_lookup_table_entry();
911 return WIMLIB_ERR_NOMEM;
913 copy_resource_entry(<e->resource_entry, res_entry);
915 lte->part_number = wim->hdr.part_number;
916 lte_init_wim(lte, wim);
918 ret = read_full_resource_into_alloc_buf(lte, buf_ret);
919 free_lookup_table_entry(lte);
925 consume_data_callback_t extract_chunk;
926 void *extract_chunk_arg;
930 extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size,
933 struct extract_ctx *ctx = _ctx;
935 sha1_update(&ctx->sha_ctx, chunk, chunk_size);
936 return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg);
939 /* Extracts the first @size bytes of a WIM resource to somewhere. In the
940 * process, the SHA1 message digest of the resource is checked if the full
941 * resource is being extracted.
943 * @extract_chunk is a function that is called to extract each chunk of the
946 extract_wim_resource(const struct wim_lookup_table_entry *lte,
948 consume_data_callback_t extract_chunk,
949 void *extract_chunk_arg)
952 if (size == wim_resource_size(lte)) {
954 struct extract_ctx ctx;
955 ctx.extract_chunk = extract_chunk;
956 ctx.extract_chunk_arg = extract_chunk_arg;
957 sha1_init(&ctx.sha_ctx);
958 ret = read_resource_prefix(lte, size,
959 extract_chunk_sha1_wrapper,
962 u8 hash[SHA1_HASH_SIZE];
963 sha1_final(hash, &ctx.sha_ctx);
964 if (!hashes_equal(hash, lte->hash)) {
965 if (wimlib_print_errors) {
966 ERROR("Invalid SHA1 message digest "
967 "on the following WIM resource:");
968 print_lookup_table_entry(lte, stderr);
969 if (lte->resource_location == RESOURCE_IN_WIM)
970 ERROR("The WIM file appears to be corrupt!");
972 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
977 ret = read_resource_prefix(lte, size, extract_chunk,
978 extract_chunk_arg, 0);
984 extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p)
986 struct filedes *fd = _fd_p;
987 int ret = full_write(fd, buf, len);
989 ERROR_WITH_ERRNO("Error writing to file descriptor");
994 extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte,
995 struct filedes *fd, u64 size)
997 return extract_wim_resource(lte, size, extract_wim_chunk_to_fd, fd);
1002 sha1_chunk(const void *buf, size_t len, void *ctx)
1004 sha1_update(ctx, buf, len);
1008 /* Calculate the SHA1 message digest of a stream. */
1010 sha1_resource(struct wim_lookup_table_entry *lte)
1015 sha1_init(&sha_ctx);
1016 ret = read_resource_prefix(lte, wim_resource_size(lte),
1017 sha1_chunk, &sha_ctx, 0);
1019 sha1_final(lte->hash, &sha_ctx);
1023 /* Translates a WIM resource entry from the on-disk format to an in-memory
1026 get_resource_entry(const struct resource_entry_disk *disk_entry,
1027 struct resource_entry *entry)
1029 /* Note: disk_entry may not be 8 byte aligned--- in that case, the
1030 * offset and original_size members will be unaligned. (This should be
1031 * okay since `struct resource_entry_disk' is declared as packed.) */
1033 /* Read the size and flags into a bitfield portably... */
1034 entry->size = (((u64)disk_entry->size[0] << 0) |
1035 ((u64)disk_entry->size[1] << 8) |
1036 ((u64)disk_entry->size[2] << 16) |
1037 ((u64)disk_entry->size[3] << 24) |
1038 ((u64)disk_entry->size[4] << 32) |
1039 ((u64)disk_entry->size[5] << 40) |
1040 ((u64)disk_entry->size[6] << 48));
1041 entry->flags = disk_entry->flags;
1042 entry->offset = le64_to_cpu(disk_entry->offset);
1043 entry->original_size = le64_to_cpu(disk_entry->original_size);
1045 /* offset and original_size are truncated to 62 bits to avoid possible
1046 * overflows, when converting to a signed 64-bit integer (off_t) or when
1047 * adding size or original_size. This is okay since no one would ever
1048 * actually have a WIM bigger than 4611686018427387903 bytes... */
1049 if (entry->offset & 0xc000000000000000ULL) {
1050 WARNING("Truncating offset in resource entry");
1051 entry->offset &= 0x3fffffffffffffffULL;
1053 if (entry->original_size & 0xc000000000000000ULL) {
1054 WARNING("Truncating original_size in resource entry");
1055 entry->original_size &= 0x3fffffffffffffffULL;
1059 /* Translates a WIM resource entry from an in-memory format into the on-disk
1062 put_resource_entry(const struct resource_entry *entry,
1063 struct resource_entry_disk *disk_entry)
1065 /* Note: disk_entry may not be 8 byte aligned--- in that case, the
1066 * offset and original_size members will be unaligned. (This should be
1067 * okay since `struct resource_entry_disk' is declared as packed.) */
1068 u64 size = entry->size;
1070 disk_entry->size[0] = size >> 0;
1071 disk_entry->size[1] = size >> 8;
1072 disk_entry->size[2] = size >> 16;
1073 disk_entry->size[3] = size >> 24;
1074 disk_entry->size[4] = size >> 32;
1075 disk_entry->size[5] = size >> 40;
1076 disk_entry->size[6] = size >> 48;
1077 disk_entry->flags = entry->flags;
1078 disk_entry->offset = cpu_to_le64(entry->offset);
1079 disk_entry->original_size = cpu_to_le64(entry->original_size);