4 * Read uncompressed and compressed metadata and file resources from a WIM file.
8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free Software
14 * Foundation; either version 3 of the License, or (at your option) any later
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License along with
22 * wimlib; if not, see http://www.gnu.org/licenses/.
30 #include "wimlib/dentry.h"
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/lookup_table.h"
35 #include "wimlib/resource.h"
36 #include "wimlib/sha1.h"
39 /* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */
40 # include "wimlib/win32.h"
44 /* for read_ntfs_file_prefix() */
45 # include "wimlib/ntfs_3g.h"
58 * Reads all or part of a compressed WIM resource.
60 * Returns zero on success, nonzero on failure.
63 read_compressed_resource(int in_fd,
64 u64 resource_compressed_size,
65 u64 resource_uncompressed_size,
70 consume_data_callback_t cb,
79 int (*decompress)(const void *, unsigned, void *, unsigned);
80 /* Set the appropriate decompress function. */
81 if (resource_ctype == WIMLIB_COMPRESSION_TYPE_LZX)
82 decompress = wimlib_lzx_decompress;
84 decompress = wimlib_xpress_decompress;
86 /* The structure of a compressed resource consists of a table of chunk
87 * offsets followed by the chunks themselves. Each chunk consists of
88 * compressed data, and there is one chunk for each WIM_CHUNK_SIZE =
89 * 32768 bytes of the uncompressed file, with the last chunk having any
92 * The chunk offsets are measured relative to the end of the chunk
93 * table. The first chunk is omitted from the table in the WIM file
94 * because its offset is implicitly given by the fact that it directly
95 * follows the chunk table and therefore must have an offset of 0.
98 /* Calculate how many chunks the resource consists of in its entirety.
100 u64 num_chunks = DIV_ROUND_UP(resource_uncompressed_size, WIM_CHUNK_SIZE);
102 /* As mentioned, the first chunk has no entry in the chunk table. */
103 u64 num_chunk_entries = num_chunks - 1;
106 /* The index of the chunk that the read starts at. */
107 u64 start_chunk = offset / WIM_CHUNK_SIZE;
108 /* The byte offset at which the read starts, within the start chunk. */
109 u64 start_chunk_offset = offset % WIM_CHUNK_SIZE;
111 /* The index of the chunk that contains the last byte of the read. */
112 u64 end_chunk = (offset + len - 1) / WIM_CHUNK_SIZE;
113 /* The byte offset of the last byte of the read, within the end chunk */
114 u64 end_chunk_offset = (offset + len - 1) % WIM_CHUNK_SIZE;
116 /* Number of chunks that are actually needed to read the requested part
118 u64 num_needed_chunks = end_chunk - start_chunk + 1;
120 /* If the end chunk is not the last chunk, an extra chunk entry is
121 * needed because we need to know the offset of the chunk after the last
122 * chunk read to figure out the size of the last read chunk. */
123 if (end_chunk != num_chunks - 1)
126 /* According to M$'s documentation, if the uncompressed size of
127 * the file is greater than 4 GB, the chunk entries are 8-byte
128 * integers. Otherwise, they are 4-byte integers. */
129 u64 chunk_entry_size = (resource_uncompressed_size >
130 (u64)1 << 32) ? 8 : 4;
132 /* Size of the full chunk table in the WIM file. */
133 u64 chunk_table_size = chunk_entry_size * num_chunk_entries;
135 /* Allocate the chunk table. It will only contain offsets for the
136 * chunks that are actually needed for this read. */
138 bool chunk_offsets_malloced;
139 if (num_needed_chunks < 1024) {
140 chunk_offsets = alloca(num_needed_chunks * sizeof(u64));
141 chunk_offsets_malloced = false;
143 chunk_offsets = malloc(num_needed_chunks * sizeof(u64));
144 if (!chunk_offsets) {
145 ERROR("Failed to allocate chunk table "
146 "with %"PRIu64" entries", num_needed_chunks);
147 return WIMLIB_ERR_NOMEM;
149 chunk_offsets_malloced = true;
152 /* Set the implicit offset of the first chunk if it is included in the
155 * Note: M$'s documentation includes a picture that shows the first
156 * chunk starting right after the chunk entry table, labeled as offset
157 * 0x10. However, in the actual file format, the offset is measured
158 * from the end of the chunk entry table, so the first chunk has an
160 if (start_chunk == 0)
161 chunk_offsets[0] = 0;
164 /* Read the needed chunk offsets from the table in the WIM file. */
166 /* Index, in the WIM file, of the first needed entry in the
168 u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
170 /* Number of entries we need to actually read from the chunk
171 * table (excludes the implicit first chunk). */
172 u64 num_needed_chunk_entries = (start_chunk == 0) ?
173 num_needed_chunks - 1 : num_needed_chunks;
175 /* Skip over unneeded chunk table entries. */
176 u64 file_offset_of_needed_chunk_entries = resource_offset +
177 start_table_idx * chunk_entry_size;
179 /* Allocate a buffer into which to read the raw chunk entries. */
181 bool chunk_tab_buf_malloced = false;
183 /* Number of bytes we need to read from the chunk table. */
184 size_t size = num_needed_chunk_entries * chunk_entry_size;
185 if ((u64)size != num_needed_chunk_entries * chunk_entry_size) {
186 ERROR("Compressed read request too large to fit into memory!");
187 ret = WIMLIB_ERR_NOMEM;
192 chunk_tab_buf = alloca(size);
194 chunk_tab_buf = malloc(size);
195 if (!chunk_tab_buf) {
196 ERROR("Failed to allocate chunk table buffer of "
197 "size %zu bytes", size);
198 ret = WIMLIB_ERR_NOMEM;
201 chunk_tab_buf_malloced = true;
204 if (full_pread(in_fd, chunk_tab_buf, size,
205 file_offset_of_needed_chunk_entries) != size)
208 /* Now fill in chunk_offsets from the entries we have read in
211 u64 *chunk_tab_p = chunk_offsets;
212 if (start_chunk == 0)
215 if (chunk_entry_size == 4) {
216 le32 *entries = (le32*)chunk_tab_buf;
217 while (num_needed_chunk_entries--)
218 *chunk_tab_p++ = le32_to_cpu(*entries++);
220 le64 *entries = (le64*)chunk_tab_buf;
221 while (num_needed_chunk_entries--)
222 *chunk_tab_p++ = le64_to_cpu(*entries++);
225 /* Done reading the chunk table now. Now calculate the file offset for
226 * the first byte of compressed data we need to read. */
228 u64 cur_read_offset = resource_offset + chunk_table_size + chunk_offsets[0];
230 /* Pointer to current position in the output buffer for uncompressed
231 * data. Alternatively, if using a callback function, we repeatedly
232 * fill a temporary buffer to feed data into the callback function. */
235 out_p = alloca(WIM_CHUNK_SIZE);
239 /* Buffer for compressed data. While most compressed chunks will have a
240 * size much less than WIM_CHUNK_SIZE, WIM_CHUNK_SIZE - 1 is the maximum
241 * size in the worst-case. This assumption is valid only if chunks that
242 * happen to compress to more than the uncompressed size (i.e. a
243 * sequence of random bytes) are always stored uncompressed. But this seems
244 * to be the case in M$'s WIM files, even though it is undocumented. */
245 void *compressed_buf = alloca(WIM_CHUNK_SIZE - 1);
247 /* Decompress all the chunks. */
248 for (u64 i = start_chunk; i <= end_chunk; i++) {
250 /* Calculate the sizes of the compressed chunk and of the
251 * uncompressed chunk. */
252 unsigned compressed_chunk_size;
253 unsigned uncompressed_chunk_size;
254 if (i != num_chunks - 1) {
255 /* All the chunks except the last one in the resource
256 * expand to WIM_CHUNK_SIZE uncompressed, and the amount
257 * of compressed data for the chunk is given by the
258 * difference of offsets in the chunk offset table. */
259 compressed_chunk_size = chunk_offsets[i + 1 - start_chunk] -
260 chunk_offsets[i - start_chunk];
261 uncompressed_chunk_size = WIM_CHUNK_SIZE;
263 /* The last compressed chunk consists of the remaining
264 * bytes in the file resource, and the last uncompressed
265 * chunk has size equal to however many bytes are left-
266 * that is, the remainder of the uncompressed size when
267 * divided by WIM_CHUNK_SIZE.
269 * Note that the resource_compressed_size includes the
270 * chunk table, so the size of it must be subtracted. */
271 compressed_chunk_size = resource_compressed_size -
273 chunk_offsets[i - start_chunk];
275 uncompressed_chunk_size = resource_uncompressed_size %
278 /* If the remainder is 0, the last chunk actually
279 * uncompresses to a full WIM_CHUNK_SIZE bytes. */
280 if (uncompressed_chunk_size == 0)
281 uncompressed_chunk_size = WIM_CHUNK_SIZE;
284 /* Figure out how much of this chunk we actually need to read */
286 if (i == start_chunk)
287 start_offset = start_chunk_offset;
292 end_offset = end_chunk_offset;
294 end_offset = WIM_CHUNK_SIZE - 1;
296 unsigned partial_chunk_size = end_offset + 1 - start_offset;
297 bool is_partial_chunk = (partial_chunk_size != uncompressed_chunk_size);
299 /* This is undocumented, but chunks can be uncompressed. This
300 * appears to always be the case when the compressed chunk size
301 * is equal to the uncompressed chunk size. */
302 if (compressed_chunk_size == uncompressed_chunk_size) {
303 /* Uncompressed chunk */
304 if (full_pread(in_fd,
305 cb ? out_p + start_offset : out_p,
307 cur_read_offset + start_offset) != partial_chunk_size)
312 /* Compressed chunk */
314 /* Read the compressed data into compressed_buf. */
315 if (full_pread(in_fd,
317 compressed_chunk_size,
318 cur_read_offset) != compressed_chunk_size)
323 /* For partial chunks and when writing directly to a
324 * buffer, we must buffer the uncompressed data because
325 * we don't need all of it. */
326 if (is_partial_chunk && !cb) {
327 u8 uncompressed_buf[uncompressed_chunk_size];
329 ret = decompress(compressed_buf,
330 compressed_chunk_size,
332 uncompressed_chunk_size);
334 ret = WIMLIB_ERR_DECOMPRESSION;
337 memcpy(out_p, uncompressed_buf + start_offset,
340 ret = decompress(compressed_buf,
341 compressed_chunk_size,
343 uncompressed_chunk_size);
345 ret = WIMLIB_ERR_DECOMPRESSION;
351 /* Feed the data to the callback function */
352 ret = cb(out_p + start_offset,
353 partial_chunk_size, ctx_or_buf);
357 /* No callback function provided; we are writing
358 * directly to a buffer. Advance the pointer into this
359 * buffer by the number of uncompressed bytes that were
361 out_p += partial_chunk_size;
363 cur_read_offset += compressed_chunk_size;
368 if (chunk_offsets_malloced)
370 if (chunk_tab_buf_malloced)
375 ERROR_WITH_ERRNO("Error reading compressed file resource");
376 ret = WIMLIB_ERR_READ;
380 /* Translates a WIM resource entry from the on-disk format to an in-memory
383 get_resource_entry(const struct resource_entry_disk *disk_entry,
384 struct resource_entry *entry)
386 /* Note: disk_entry may not be 8 byte aligned--- in that case, the
387 * offset and original_size members will be unaligned. (This should be
388 * okay since `struct resource_entry_disk' is declared as packed.) */
390 /* Read the size and flags into a bitfield portably... */
391 entry->size = (((u64)disk_entry->size[0] << 0) |
392 ((u64)disk_entry->size[1] << 8) |
393 ((u64)disk_entry->size[2] << 16) |
394 ((u64)disk_entry->size[3] << 24) |
395 ((u64)disk_entry->size[4] << 32) |
396 ((u64)disk_entry->size[5] << 40) |
397 ((u64)disk_entry->size[6] << 48));
398 entry->flags = disk_entry->flags;
399 entry->offset = le64_to_cpu(disk_entry->offset);
400 entry->original_size = le64_to_cpu(disk_entry->original_size);
402 /* offset and original_size are truncated to 62 bits to avoid possible
403 * overflows, when converting to a signed 64-bit integer (off_t) or when
404 * adding size or original_size. This is okay since no one would ever
405 * actually have a WIM bigger than 4611686018427387903 bytes... */
406 if (entry->offset & 0xc000000000000000ULL) {
407 WARNING("Truncating offset in resource entry");
408 entry->offset &= 0x3fffffffffffffffULL;
410 if (entry->original_size & 0xc000000000000000ULL) {
411 WARNING("Truncating original_size in resource entry");
412 entry->original_size &= 0x3fffffffffffffffULL;
416 /* Translates a WIM resource entry from an in-memory format into the on-disk
419 put_resource_entry(const struct resource_entry *entry,
420 struct resource_entry_disk *disk_entry)
422 /* Note: disk_entry may not be 8 byte aligned--- in that case, the
423 * offset and original_size members will be unaligned. (This should be
424 * okay since `struct resource_entry_disk' is declared as packed.) */
425 u64 size = entry->size;
427 disk_entry->size[0] = size >> 0;
428 disk_entry->size[1] = size >> 8;
429 disk_entry->size[2] = size >> 16;
430 disk_entry->size[3] = size >> 24;
431 disk_entry->size[4] = size >> 32;
432 disk_entry->size[5] = size >> 40;
433 disk_entry->size[6] = size >> 48;
434 disk_entry->flags = entry->flags;
435 disk_entry->offset = cpu_to_le64(entry->offset);
436 disk_entry->original_size = cpu_to_le64(entry->original_size);
440 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
442 consume_data_callback_t cb,
451 wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
456 if (lte->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED &&
457 !(flags & WIMLIB_RESOURCE_FLAG_RAW))
459 ret = read_compressed_resource(in_fd,
460 lte->resource_entry.size,
461 lte->resource_entry.original_size,
462 lte->resource_entry.offset,
463 wimlib_get_compression_type(wim),
469 offset += lte->resource_entry.offset;
471 /* Send data to callback function */
472 u8 buf[min(WIM_CHUNK_SIZE, size)];
474 size_t bytes_to_read = min(WIM_CHUNK_SIZE, size);
475 size_t bytes_read = full_pread(in_fd, buf,
476 bytes_to_read, offset);
477 if (bytes_read != bytes_to_read)
479 ret = cb(buf, bytes_read, ctx_or_buf);
483 offset += bytes_read;
486 /* Send data directly to a buffer */
487 if (full_pread(in_fd, ctx_or_buf, size, offset) != size)
494 ERROR_WITH_ERRNO("Error reading data from WIM");
495 ret = WIMLIB_ERR_READ;
506 read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
507 size_t size, u64 offset, void *buf)
509 return read_partial_wim_resource(lte, size, NULL, buf, 0, offset);
513 read_wim_resource_prefix(const struct wim_lookup_table_entry *lte,
515 consume_data_callback_t cb,
519 return read_partial_wim_resource(lte, size, cb, ctx_or_buf, flags, 0);
525 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
527 consume_data_callback_t cb,
531 const tchar *filename = lte->file_on_disk;
536 fd = open(filename, O_RDONLY);
538 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", filename);
539 return WIMLIB_ERR_OPEN;
542 /* Send data to callback function */
543 u8 buf[min(WIM_CHUNK_SIZE, size)];
544 size_t bytes_to_read;
546 bytes_to_read = min(WIM_CHUNK_SIZE, size);
547 bytes_read = full_read(fd, buf, bytes_to_read);
548 if (bytes_read != bytes_to_read)
550 ret = cb(buf, bytes_read, ctx_or_buf);
556 /* Send data directly to a buffer */
557 bytes_read = full_read(fd, ctx_or_buf, size);
558 if (bytes_read != size)
564 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", filename);
565 ret = WIMLIB_ERR_READ;
570 #endif /* !__WIN32__ */
573 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
574 u64 size, consume_data_callback_t cb,
575 void *ctx_or_buf, int _ignored_flags)
577 const void *inbuf = lte->attached_buffer;
582 size_t chunk_size = min(WIM_CHUNK_SIZE, size);
583 ret = cb(inbuf, chunk_size, ctx_or_buf);
590 memcpy(ctx_or_buf, inbuf, size);
595 typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
597 consume_data_callback_t cb,
602 * Read the first @size bytes from a generic "resource", which may be located in
603 * the WIM (compressed or uncompressed), in an external file, or directly in an
606 * Feed the data either to a callback function (cb != NULL, passing it
607 * ctx_or_buf), or write it directly into a buffer (cb == NULL, ctx_or_buf
608 * specifies the buffer, which must have room for @size bytes).
610 * When using a callback function, it is called with chunks up to 32768 bytes in
611 * size until the resource is exhausted.
613 * If the resource is located in a WIM file, @flags can be:
614 * * WIMLIB_RESOURCE_FLAG_RAW if the raw compressed data is to be supplied
615 * instead of the uncompressed data.
616 * Otherwise, the @flags are ignored.
619 read_resource_prefix(const struct wim_lookup_table_entry *lte,
620 u64 size, consume_data_callback_t cb, void *ctx_or_buf,
623 static const read_resource_prefix_handler_t handlers[] = {
624 [RESOURCE_IN_WIM] = read_wim_resource_prefix,
626 [RESOURCE_IN_FILE_ON_DISK] = read_file_on_disk_prefix,
628 [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
630 [RESOURCE_IN_STAGING_FILE] = read_file_on_disk_prefix,
633 [RESOURCE_IN_NTFS_VOLUME] = read_ntfs_file_prefix,
636 [RESOURCE_WIN32] = read_win32_file_prefix,
637 [RESOURCE_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix,
640 wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
641 && handlers[lte->resource_location] != NULL);
642 return handlers[lte->resource_location](lte, size, cb, ctx_or_buf, flags);
646 read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
649 return read_resource_prefix(lte, wim_resource_size(lte), NULL, buf, 0);
654 consume_data_callback_t extract_chunk;
655 void *extract_chunk_arg;
659 extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size,
662 struct extract_ctx *ctx = _ctx;
664 sha1_update(&ctx->sha_ctx, chunk, chunk_size);
665 return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg);
668 /* Extracts the first @size bytes of a WIM resource to somewhere. In the
669 * process, the SHA1 message digest of the resource is checked if the full
670 * resource is being extracted.
672 * @extract_chunk is a function that is called to extract each chunk of the
675 extract_wim_resource(const struct wim_lookup_table_entry *lte,
677 consume_data_callback_t extract_chunk,
678 void *extract_chunk_arg)
681 if (size == wim_resource_size(lte)) {
683 struct extract_ctx ctx;
684 ctx.extract_chunk = extract_chunk;
685 ctx.extract_chunk_arg = extract_chunk_arg;
686 sha1_init(&ctx.sha_ctx);
687 ret = read_resource_prefix(lte, size,
688 extract_chunk_sha1_wrapper,
691 u8 hash[SHA1_HASH_SIZE];
692 sha1_final(hash, &ctx.sha_ctx);
693 if (!hashes_equal(hash, lte->hash)) {
694 if (wimlib_print_errors) {
695 ERROR("Invalid SHA1 message digest "
696 "on the following WIM resource:");
697 print_lookup_table_entry(lte, stderr);
698 if (lte->resource_location == RESOURCE_IN_WIM)
699 ERROR("The WIM file appears to be corrupt!");
701 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
706 ret = read_resource_prefix(lte, size, extract_chunk,
707 extract_chunk_arg, 0);
713 extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p)
715 int fd = *(int*)_fd_p;
716 ssize_t ret = full_write(fd, buf, len);
718 ERROR_WITH_ERRNO("Error writing to file descriptor");
719 return WIMLIB_ERR_WRITE;
726 extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte,
729 return extract_wim_resource(lte, size, extract_wim_chunk_to_fd, &fd);
734 sha1_chunk(const void *buf, size_t len, void *ctx)
736 sha1_update(ctx, buf, len);
740 /* Calculate the SHA1 message digest of a stream. */
742 sha1_resource(struct wim_lookup_table_entry *lte)
748 ret = read_resource_prefix(lte, wim_resource_size(lte),
749 sha1_chunk, &sha_ctx, 0);
751 sha1_final(lte->hash, &sha_ctx);
756 * Copies the file resource specified by the lookup table entry @lte from the
757 * input WIM to the output WIM that has its FILE * given by
758 * ((WIMStruct*)wim)->out_fp.
760 * The output_resource_entry, out_refcnt, and part_number fields of @lte are
763 * (This function is confusing and should be refactored somehow.)
766 copy_resource(struct wim_lookup_table_entry *lte, void *wim)
771 ret = write_wim_resource(lte, w->out_fd,
772 wim_resource_compression_type(lte),
773 <e->output_resource_entry, 0);
775 lte->out_refcnt = lte->refcnt;
776 lte->part_number = w->hdr.part_number;