4 * Read uncompressed and compressed metadata and file resources from a WIM file.
8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free Software
14 * Foundation; either version 3 of the License, or (at your option) any later
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License along with
22 * wimlib; if not, see http://www.gnu.org/licenses/.
25 #include "wimlib_internal.h"
27 #include "lookup_table.h"
28 #include "buffer_io.h"
41 /* Write @n bytes from @buf to the file descriptor @fd, retrying on internupt
42 * and on short writes.
44 * Returns short count and set errno on failure. */
46 full_write(int fd, const void *buf, size_t n)
53 ret = write(fd, p, n);
66 /* Read @n bytes from the file descriptor @fd to the buffer @buf, retrying on
67 * internupt and on short reads.
69 * Returns short count and set errno on failure. */
71 full_read(int fd, void *buf, size_t n)
73 size_t bytes_remaining = n;
74 while (bytes_remaining) {
75 ssize_t bytes_read = read(fd, buf, bytes_remaining);
81 bytes_remaining -= bytes_read;
84 return n - bytes_remaining;
88 * Reads all or part of a compressed WIM resource.
90 * Returns zero on success, nonzero on failure.
93 read_compressed_resource(FILE *fp, u64 resource_compressed_size,
94 u64 resource_uncompressed_size,
95 u64 resource_offset, int resource_ctype,
97 consume_data_callback_t cb,
106 int (*decompress)(const void *, unsigned, void *, unsigned);
107 /* Set the appropriate decompress function. */
108 if (resource_ctype == WIMLIB_COMPRESSION_TYPE_LZX)
109 decompress = wimlib_lzx_decompress;
111 decompress = wimlib_xpress_decompress;
113 /* The structure of a compressed resource consists of a table of chunk
114 * offsets followed by the chunks themselves. Each chunk consists of
115 * compressed data, and there is one chunk for each WIM_CHUNK_SIZE =
116 * 32768 bytes of the uncompressed file, with the last chunk having any
119 * The chunk offsets are measured relative to the end of the chunk
120 * table. The first chunk is omitted from the table in the WIM file
121 * because its offset is implicitly given by the fact that it directly
122 * follows the chunk table and therefore must have an offset of 0.
125 /* Calculate how many chunks the resource conists of in its entirety. */
126 u64 num_chunks = (resource_uncompressed_size + WIM_CHUNK_SIZE - 1) /
128 /* As mentioned, the first chunk has no entry in the chunk table. */
129 u64 num_chunk_entries = num_chunks - 1;
132 /* The index of the chunk that the read starts at. */
133 u64 start_chunk = offset / WIM_CHUNK_SIZE;
134 /* The byte offset at which the read starts, within the start chunk. */
135 u64 start_chunk_offset = offset % WIM_CHUNK_SIZE;
137 /* The index of the chunk that contains the last byte of the read. */
138 u64 end_chunk = (offset + len - 1) / WIM_CHUNK_SIZE;
139 /* The byte offset of the last byte of the read, within the end chunk */
140 u64 end_chunk_offset = (offset + len - 1) % WIM_CHUNK_SIZE;
142 /* Number of chunks that are actually needed to read the requested part
144 u64 num_needed_chunks = end_chunk - start_chunk + 1;
146 /* If the end chunk is not the last chunk, an extra chunk entry is
147 * needed because we need to know the offset of the chunk after the last
148 * chunk read to figure out the size of the last read chunk. */
149 if (end_chunk != num_chunks - 1)
152 /* Declare the chunk table. It will only contain offsets for the chunks
153 * that are actually needed for this read. */
154 u64 chunk_offsets[num_needed_chunks];
156 /* Set the implicit offset of the first chunk if it is included in the
159 * Note: M$'s documentation includes a picture that shows the first
160 * chunk starting right after the chunk entry table, labeled as offset
161 * 0x10. However, in the actual file format, the offset is measured
162 * from the end of the chunk entry table, so the first chunk has an
164 if (start_chunk == 0)
165 chunk_offsets[0] = 0;
167 /* According to M$'s documentation, if the uncompressed size of
168 * the file is greater than 4 GB, the chunk entries are 8-byte
169 * integers. Otherwise, they are 4-byte integers. */
170 u64 chunk_entry_size = (resource_uncompressed_size >= (u64)1 << 32) ?
173 /* Size of the full chunk table in the WIM file. */
174 u64 chunk_table_size = chunk_entry_size * num_chunk_entries;
176 /* Read the needed chunk offsets from the table in the WIM file. */
178 /* Index, in the WIM file, of the first needed entry in the
180 u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
182 /* Number of entries we need to actually read from the chunk
183 * table (excludes the implicit first chunk). */
184 u64 num_needed_chunk_entries = (start_chunk == 0) ?
185 num_needed_chunks - 1 : num_needed_chunks;
187 /* Skip over unneeded chunk table entries. */
188 u64 file_offset_of_needed_chunk_entries = resource_offset +
189 start_table_idx * chunk_entry_size;
190 if (fseeko(fp, file_offset_of_needed_chunk_entries, SEEK_SET))
193 /* Number of bytes we need to read from the chunk table. */
194 size_t size = num_needed_chunk_entries * chunk_entry_size;
197 u8 chunk_tab_buf[size];
199 if (fread(chunk_tab_buf, 1, size, fp) != size)
202 /* Now fill in chunk_offsets from the entries we have read in
205 u64 *chunk_tab_p = chunk_offsets;
206 if (start_chunk == 0)
209 if (chunk_entry_size == 4) {
210 u32 *entries = (u32*)chunk_tab_buf;
211 while (num_needed_chunk_entries--)
212 *chunk_tab_p++ = le32_to_cpu(*entries++);
214 u64 *entries = (u64*)chunk_tab_buf;
215 while (num_needed_chunk_entries--)
216 *chunk_tab_p++ = le64_to_cpu(*entries++);
220 /* Done with the chunk table now. We must now seek to the first chunk
221 * that is needed for the read. */
223 u64 file_offset_of_first_needed_chunk = resource_offset +
224 chunk_table_size + chunk_offsets[0];
225 if (fseeko(fp, file_offset_of_first_needed_chunk, SEEK_SET))
228 /* Pointer to current position in the output buffer for uncompressed
232 out_p = alloca(32768);
236 /* Buffer for compressed data. While most compressed chunks will have a
237 * size much less than WIM_CHUNK_SIZE, WIM_CHUNK_SIZE - 1 is the maximum
238 * size in the worst-case. This assumption is valid only if chunks that
239 * happen to compress to more than the uncompressed size (i.e. a
240 * sequence of random bytes) are always stored uncompressed. But this seems
241 * to be the case in M$'s WIM files, even though it is undocumented. */
242 void *compressed_buf = alloca(WIM_CHUNK_SIZE - 1);
244 /* Decompress all the chunks. */
245 for (u64 i = start_chunk; i <= end_chunk; i++) {
247 /* Calculate the sizes of the compressed chunk and of the
248 * uncompressed chunk. */
249 unsigned compressed_chunk_size;
250 unsigned uncompressed_chunk_size;
251 if (i != num_chunks - 1) {
252 /* All the chunks except the last one in the resource
253 * expand to WIM_CHUNK_SIZE uncompressed, and the amount
254 * of compressed data for the chunk is given by the
255 * difference of offsets in the chunk offset table. */
256 compressed_chunk_size = chunk_offsets[i + 1 - start_chunk] -
257 chunk_offsets[i - start_chunk];
258 uncompressed_chunk_size = WIM_CHUNK_SIZE;
260 /* The last compressed chunk consists of the remaining
261 * bytes in the file resource, and the last uncompressed
262 * chunk has size equal to however many bytes are left-
263 * that is, the remainder of the uncompressed size when
264 * divided by WIM_CHUNK_SIZE.
266 * Note that the resource_compressed_size includes the
267 * chunk table, so the size of it must be subtracted. */
268 compressed_chunk_size = resource_compressed_size -
270 chunk_offsets[i - start_chunk];
272 uncompressed_chunk_size = resource_uncompressed_size %
275 /* If the remainder is 0, the last chunk actually
276 * uncompresses to a full WIM_CHUNK_SIZE bytes. */
277 if (uncompressed_chunk_size == 0)
278 uncompressed_chunk_size = WIM_CHUNK_SIZE;
281 /* Figure out how much of this chunk we actually need to read */
283 if (i == start_chunk)
284 start_offset = start_chunk_offset;
289 end_offset = end_chunk_offset;
291 end_offset = WIM_CHUNK_SIZE - 1;
293 unsigned partial_chunk_size = end_offset + 1 - start_offset;
294 bool is_partial_chunk = (partial_chunk_size != uncompressed_chunk_size);
296 /* This is undocumented, but chunks can be uncompressed. This
297 * appears to always be the case when the compressed chunk size
298 * is equal to the uncompressed chunk size. */
299 if (compressed_chunk_size == uncompressed_chunk_size) {
300 /* Uncompressed chunk */
302 if (start_offset != 0)
303 if (fseeko(fp, start_offset, SEEK_CUR))
305 if (fread(out_p, 1, partial_chunk_size, fp) != partial_chunk_size)
308 /* Compressed chunk */
310 /* Read the compressed data into compressed_buf. */
311 if (fread(compressed_buf, 1, compressed_chunk_size,
312 fp) != compressed_chunk_size)
315 /* For partial chunks and when writing directly to a
316 * buffer, we must buffer the uncompressed data because
317 * we don't need all of it. */
318 if (is_partial_chunk && !cb) {
319 u8 uncompressed_buf[uncompressed_chunk_size];
321 ret = decompress(compressed_buf,
322 compressed_chunk_size,
324 uncompressed_chunk_size);
326 ret = WIMLIB_ERR_DECOMPRESSION;
329 memcpy(out_p, uncompressed_buf + start_offset,
332 ret = decompress(compressed_buf,
333 compressed_chunk_size,
335 uncompressed_chunk_size);
337 ret = WIMLIB_ERR_DECOMPRESSION;
343 /* Feed the data to the callback function */
344 ret = cb(out_p, partial_chunk_size, ctx_or_buf);
348 /* No callback function provided; we are writing
349 * directly to a buffer. Advance the pointer into this
350 * buffer by the number of uncompressed bytes that were
352 out_p += partial_chunk_size;
362 ERROR("Unexpected EOF in compressed file resource");
364 ERROR_WITH_ERRNO("Error reading compressed file resource");
365 ret = WIMLIB_ERR_READ;
370 * Reads uncompressed data from an open file stream.
373 read_uncompressed_resource(FILE *fp, u64 offset, u64 len, void *contents_ret)
375 if (fseeko(fp, offset, SEEK_SET) != 0) {
376 ERROR("Failed to seek to byte %"PRIu64" of input file "
377 "to read uncompressed resource (len = %"PRIu64")",
379 return WIMLIB_ERR_READ;
381 if (fread(contents_ret, 1, len, fp) != len) {
383 ERROR("Unexpected EOF in uncompressed file resource");
385 ERROR("Failed to read %"PRIu64" bytes from "
386 "uncompressed resource at offset %"PRIu64,
389 return WIMLIB_ERR_READ;
394 /* Reads the contents of a struct resource_entry, as represented in the on-disk
395 * format, from the memory pointed to by @p, and fills in the fields of @entry.
396 * A pointer to the byte after the memory read at @p is returned. */
398 get_resource_entry(const void *p, struct resource_entry *entry)
403 p = get_u56(p, &size);
404 p = get_u8(p, &flags);
406 entry->flags = flags;
408 /* offset and original_size are truncated to 62 bits to avoid possible
409 * overflows, when converting to a signed 64-bit integer (off_t) or when
410 * adding size or original_size. This is okay since no one would ever
411 * actually have a WIM bigger than 4611686018427387903 bytes... */
412 p = get_u64(p, &entry->offset);
413 if (entry->offset & 0xc000000000000000ULL) {
414 WARNING("Truncating offset in resource entry");
415 entry->offset &= 0x3fffffffffffffffULL;
417 p = get_u64(p, &entry->original_size);
418 if (entry->original_size & 0xc000000000000000ULL) {
419 WARNING("Truncating original_size in resource entry");
420 entry->original_size &= 0x3fffffffffffffffULL;
425 /* Copies the struct resource_entry @entry to the memory pointed to by @p in the
426 * on-disk format. A pointer to the byte after the memory written at @p is
429 put_resource_entry(void *p, const struct resource_entry *entry)
431 p = put_u56(p, entry->size);
432 p = put_u8(p, entry->flags);
433 p = put_u64(p, entry->offset);
434 p = put_u64(p, entry->original_size);
439 wim_get_fp(WIMStruct *w)
442 pthread_mutex_lock(&w->fp_tab_mutex);
445 wimlib_assert(w->filename != NULL);
447 for (size_t i = 0; i < w->num_allocated_fps; i++) {
454 DEBUG("Opening extra file descriptor to `%"TS"'", w->filename);
455 fp = tfopen(w->filename, T("rb"));
457 ERROR_WITH_ERRNO("Failed to open `%"TS"'", w->filename);
459 pthread_mutex_unlock(&w->fp_tab_mutex);
460 #else /* WITH_FUSE */
462 #endif /* !WITH_FUSE */
467 wim_release_fp(WIMStruct *w, FILE *fp)
473 pthread_mutex_lock(&w->fp_tab_mutex);
475 for (size_t i = 0; i < w->num_allocated_fps; i++) {
476 if (w->fp_tab[i] == NULL) {
482 fp_tab = REALLOC(w->fp_tab, sizeof(FILE*) * (w->num_allocated_fps + 4));
484 ret = WIMLIB_ERR_NOMEM;
489 memset(&w->fp_tab[w->num_allocated_fps], 0, 4 * sizeof(FILE*));
490 w->fp_tab[w->num_allocated_fps] = fp;
491 w->num_allocated_fps += 4;
493 pthread_mutex_unlock(&w->fp_tab_mutex);
494 #endif /* WITH_FUSE */
499 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
501 consume_data_callback_t cb,
510 wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
511 wimlib_assert(offset + size <= lte->resource_entry.original_size);
515 if (flags & WIMLIB_RESOURCE_FLAG_MULTITHREADED) {
516 wim_fp = wim_get_fp(wim);
522 wim_fp = lte->wim->fp;
525 wimlib_assert(wim_fp != NULL);
527 if (lte->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED &&
528 !(flags & WIMLIB_RESOURCE_FLAG_RAW))
530 ret = read_compressed_resource(wim_fp,
531 lte->resource_entry.size,
532 lte->resource_entry.original_size,
533 lte->resource_entry.offset,
534 wimlib_get_compression_type(wim),
540 if (fseeko(wim_fp, offset, SEEK_SET)) {
541 ERROR_WITH_ERRNO("Failed to seek to offset %"PRIu64
543 ret = WIMLIB_ERR_READ;
547 /* Send data to callback function */
548 u8 buf[min(WIM_CHUNK_SIZE, size)];
550 size_t bytes_to_read = min(WIM_CHUNK_SIZE, size);
551 size_t bytes_read = fread(buf, 1, bytes_to_read, wim_fp);
553 if (bytes_read != bytes_to_read)
555 ret = cb(buf, bytes_read, ctx_or_buf);
561 /* Send data directly to a buffer */
562 if (fread(ctx_or_buf, 1, size, wim_fp) != size)
569 ERROR_WITH_ERRNO("Error reading data from WIM");
570 ret = WIMLIB_ERR_READ;
572 if (flags & WIMLIB_RESOURCE_FLAG_MULTITHREADED)
573 ret |= wim_release_fp(wim, wim_fp);
584 read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
585 size_t size, u64 offset, void *buf,
588 return read_partial_wim_resource(lte, size, NULL, buf,
589 threadsafe ? WIMLIB_RESOURCE_FLAG_MULTITHREADED : 0,
594 read_wim_resource_prefix(const struct wim_lookup_table_entry *lte,
596 consume_data_callback_t cb,
600 return read_partial_wim_resource(lte, size, cb, ctx_or_buf, flags, 0);
605 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
607 consume_data_callback_t cb,
611 const tchar *filename = lte->file_on_disk;
616 fd = open(filename, O_RDONLY);
618 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", filename);
619 return WIMLIB_ERR_OPEN;
622 /* Send data to callback function */
623 u8 buf[min(WIM_CHUNK_SIZE, size)];
624 size_t bytes_to_read;
626 bytes_to_read = min(WIM_CHUNK_SIZE, size);
627 bytes_read = full_read(fd, buf, bytes_to_read);
628 if (bytes_read != bytes_to_read)
630 ret = cb(buf, bytes_read, ctx_or_buf);
636 /* Send data directly to a buffer */
637 bytes_read = full_read(fd, ctx_or_buf, size);
638 if (bytes_read != size)
644 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", filename);
645 ret = WIMLIB_ERR_READ;
652 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
653 u64 size, consume_data_callback_t cb,
654 void *ctx_or_buf, int _ignored_flags)
656 const void *inbuf = lte->attached_buffer;
658 return cb(inbuf, size, ctx_or_buf);
660 memcpy(ctx_or_buf, inbuf, size);
665 typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
667 consume_data_callback_t cb,
672 read_resource_prefix(const struct wim_lookup_table_entry *lte,
673 u64 size, consume_data_callback_t cb, void *ctx_or_buf,
676 static const read_resource_prefix_handler_t handlers[] = {
677 [RESOURCE_IN_WIM] = read_wim_resource_prefix,
678 [RESOURCE_IN_FILE_ON_DISK] = read_file_on_disk_prefix,
679 [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
681 [RESOURCE_IN_STAGING_FILE] = read_file_on_disk_prefix,
684 [RESOURCE_IN_NTFS_VOLUME] = read_ntfs_file_prefix,
687 [RESOURCE_WIN32] = read_win32_file_prefix,
688 [RESOURCE_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix,
691 wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
692 && handlers[lte->resource_location] != NULL);
693 return handlers[lte->resource_location](lte, size, cb, ctx_or_buf, flags);
697 read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
698 void *buf, bool thread_safe)
700 return read_resource_prefix(lte,
701 wim_resource_size(lte),
703 thread_safe ? WIMLIB_RESOURCE_FLAG_MULTITHREADED : 0);
706 /* Extracts the first @size bytes of a WIM resource to somewhere. In the
707 * process, the SHA1 message digest of the resource is checked if the full
708 * resource is being extracted.
710 * @extract_chunk is a function that is called to extract each chunk of the
713 extract_wim_resource(const struct wim_lookup_table_entry *lte,
715 consume_data_callback_t extract_chunk,
716 void *extract_chunk_arg)
718 return read_resource_prefix(lte, size, extract_chunk,
719 extract_chunk_arg, 0);
723 extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p)
725 int fd = *(int*)_fd_p;
726 ssize_t ret = full_write(fd, buf, len);
728 ERROR_WITH_ERRNO("Error writing to file descriptor");
729 return WIMLIB_ERR_WRITE;
736 extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte,
739 return extract_wim_resource(lte, size, extract_wim_chunk_to_fd, &fd);
743 * Copies the file resource specified by the lookup table entry @lte from the
744 * input WIM to the output WIM that has its FILE * given by
745 * ((WIMStruct*)wim)->out_fp.
747 * The output_resource_entry, out_refcnt, and part_number fields of @lte are
750 * (This function is confusing and should be refactored somehow.)
753 copy_resource(struct wim_lookup_table_entry *lte, void *wim)
758 ret = write_wim_resource(lte, w->out_fp,
759 wim_resource_compression_type(lte),
760 <e->output_resource_entry, 0);
762 lte->out_refcnt = lte->refcnt;
763 lte->part_number = w->hdr.part_number;