4 * Read uncompressed and compressed metadata and file resources.
6 * Copyright (C) 2010 Carl Thijssen
7 * Copyright (C) 2012 Eric Biggers
9 * wimlib - Library for working with WIM files
11 * This library is free software; you can redistribute it and/or modify it under
12 * the terms of the GNU Lesser General Public License as published by the Free
13 * Software Foundation; either version 2.1 of the License, or (at your option) any
16 * This library is distributed in the hope that it will be useful, but WITHOUT ANY
17 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
18 * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License along
21 * with this library; if not, write to the Free Software Foundation, Inc., 59
22 * Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include "wimlib_internal.h"
34 * Reads all or part of a compressed resource into an in-memory buffer.
36 * @fp: The FILE* for the WIM file.
37 * @resource_compressed_size: The compressed size of the resource.
38 * @resource_uncompressed_size: The uncompressed size of the resource.
39 * @resource_offset: The offset of the start of the resource from
40 * the start of the stream @fp.
41 * @resource_ctype: The compression type of the resource.
42 * @len: The number of bytes of uncompressed data to read from
44 * @offset: The offset of the bytes to read within the uncompressed
46 * @contents_len: An array into which the uncompressed data is written.
47 * It must be at least @len bytes long.
49 * Returns zero on success, nonzero on failure.
51 static int read_compressed_resource(FILE *fp, u64 resource_compressed_size,
52 u64 resource_uncompressed_size,
53 u64 resource_offset, int resource_ctype,
54 u64 len, u64 offset, u8 contents_ret[])
57 DEBUG2("comp size = %"PRIu64", "
58 "uncomp size = %"PRIu64", "
59 "res offset = %"PRIu64"\n",
60 resource_compressed_size,
61 resource_uncompressed_size,
63 DEBUG2("resource_ctype = %s, len = %"PRIu64", offset = %"PRIu64"\n",
64 wimlib_get_compression_type_string(resource_ctype),
70 int (*decompress)(const void *, uint, void *, uint);
71 /* Set the appropriate decompress function. */
72 if (resource_ctype == WIM_COMPRESSION_TYPE_LZX)
73 decompress = lzx_decompress;
75 decompress = xpress_decompress;
77 /* The structure of a compressed resource consists of a table of chunk
78 * offsets followed by the chunks themselves. Each chunk consists of
79 * compressed data, and there is one chunk for each WIM_CHUNK_SIZE =
80 * 32768 bytes of the uncompressed file, with the last chunk having any
83 * The chunk offsets are measured relative to the end of the chunk
84 * table. The first chunk is omitted from the table in the WIM file
85 * because its offset is implicitly given by the fact that it directly
86 * follows the chunk table and therefore must have an offset of 0.
89 /* Calculate how many chunks the resource conists of in its entirety. */
90 u64 num_chunks = (resource_uncompressed_size + WIM_CHUNK_SIZE - 1) /
92 /* As mentioned, the first chunk has no entry in the chunk table. */
93 u64 num_chunk_entries = num_chunks - 1;
96 /* The index of the chunk that the read starts at. */
97 u64 start_chunk = offset / WIM_CHUNK_SIZE;
98 /* The byte offset at which the read starts, within the start chunk. */
99 u64 start_chunk_offset = offset % WIM_CHUNK_SIZE;
101 /* The index of the chunk that contains the last byte of the read. */
102 u64 end_chunk = (offset + len - 1) / WIM_CHUNK_SIZE;
103 /* The byte offset of the last byte of the read, within the end chunk */
104 u64 end_chunk_offset = (offset + len - 1) % WIM_CHUNK_SIZE;
106 /* Number of chunks that are actually needed to read the requested part
108 u64 num_needed_chunks = end_chunk - start_chunk + 1;
110 /* If the end chunk is not the last chunk, an extra chunk entry is
111 * needed because we need to know the offset of the chunk after the last
112 * chunk read to figure out the size of the last read chunk. */
113 if (end_chunk != num_chunks - 1)
116 /* Declare the chunk table. It will only contain offsets for the chunks
117 * that are actually needed for this read. */
118 u64 chunk_offsets[num_needed_chunks];
120 /* Set the implicit offset of the first chunk if it is included in the
123 * Note: M$'s documentation includes a picture that shows the first
124 * chunk starting right after the chunk entry table, labeled as offset
125 * 0x10. However, in the actual file format, the offset is measured
126 * from the end of the chunk entry table, so the first chunk has an
128 if (start_chunk == 0)
129 chunk_offsets[0] = 0;
131 /* According to M$'s documentation, if the uncompressed size of
132 * the file is greater than 4 GB, the chunk entries are 8-byte
133 * integers. Otherwise, they are 4-byte integers. */
134 u64 chunk_entry_size = (resource_uncompressed_size >= (u64)1 << 32) ?
137 /* Size of the full chunk table in the WIM file. */
138 u64 chunk_table_size = chunk_entry_size * num_chunk_entries;
140 /* Read the needed chunk offsets from the table in the WIM file. */
142 /* Index, in the WIM file, of the first needed entry in the
144 u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
146 /* Number of entries we need to actually read from the chunk
147 * table (excludes the implicit first chunk). */
148 u64 num_needed_chunk_entries = (start_chunk == 0) ?
149 num_needed_chunks - 1 : num_needed_chunks;
151 /* Skip over unneeded chunk table entries. */
152 u64 file_offset_of_needed_chunk_entries = resource_offset +
153 start_table_idx * chunk_entry_size;
154 if (fseeko(fp, file_offset_of_needed_chunk_entries, SEEK_SET) != 0) {
155 ERROR("Failed to seek to byte %"PRIu64" "
156 "to read chunk table of compressed "
158 file_offset_of_needed_chunk_entries);
159 return WIMLIB_ERR_READ;
162 /* Number of bytes we need to read from the chunk table. */
163 size_t size = num_needed_chunk_entries * chunk_entry_size;
165 u8 chunk_tab_buf[size];
167 if (fread(chunk_tab_buf, 1, size, fp) != size)
170 /* Now fill in chunk_offsets from the entries we have read in
173 u64 *chunk_tab_p = chunk_offsets;
174 if (start_chunk == 0)
177 if (chunk_entry_size == 4) {
178 u32 *entries = (u32*)chunk_tab_buf;
179 while (num_needed_chunk_entries--)
180 *chunk_tab_p++ = to_le32(*entries++);
182 u64 *entries = (u64*)chunk_tab_buf;
183 while (num_needed_chunk_entries--)
184 *chunk_tab_p++ = to_le64(*entries++);
187 /* Done with the chunk table now. We must now seek to the first chunk
188 * that is needed for the read. */
190 u64 file_offset_of_first_needed_chunk = resource_offset +
191 chunk_table_size + chunk_offsets[0];
192 if (fseeko(fp, file_offset_of_first_needed_chunk, SEEK_SET) != 0) {
193 ERROR("Failed to seek to byte %"PRIu64" "
194 "to read first chunk of compressed "
196 file_offset_of_first_needed_chunk);
197 return WIMLIB_ERR_READ;
200 /* Pointer to current position in the output buffer for uncompressed
202 u8 *out_p = (u8*)contents_ret;
204 /* Buffer for compressed data. While most compressed chunks will have a
205 * size much less than WIM_CHUNK_SIZE, WIM_CHUNK_SIZE - 1 is the maximum
206 * size in the worst-case. This assumption is valid only if chunks that
207 * happen to compress to more than the uncompressed size (i.e. a
208 * sequence of random bytes) are always stored uncompressed. But this seems
209 * to be the case in M$'s WIM files, even though it is undocumented. */
210 u8 compressed_buf[WIM_CHUNK_SIZE - 1];
213 /* Decompress all the chunks. */
214 for (u64 i = start_chunk; i <= end_chunk; i++) {
216 DEBUG2("Chunk %"PRIu64" (start %"PRIu64", end %"PRIu64")\n",
217 i, start_chunk, end_chunk);
219 /* Calculate the sizes of the compressed chunk and of the
220 * uncompressed chunk. */
221 uint compressed_chunk_size, uncompressed_chunk_size;
222 if (i != num_chunks - 1) {
223 /* All the chunks except the last one in the resource
224 * expand to WIM_CHUNK_SIZE uncompressed, and the amount
225 * of compressed data for the chunk is given by the
226 * difference of offsets in the chunk offset table. */
227 compressed_chunk_size = chunk_offsets[i + 1 - start_chunk] -
228 chunk_offsets[i - start_chunk];
229 uncompressed_chunk_size = WIM_CHUNK_SIZE;
231 /* The last compressed chunk consists of the remaining
232 * bytes in the file resource, and the last uncompressed
233 * chunk has size equal to however many bytes are left-
234 * that is, the remainder of the uncompressed size when
235 * divided by WIM_CHUNK_SIZE.
237 * Note that the resource_compressed_size includes the
238 * chunk table, so the size of it must be subtracted. */
239 compressed_chunk_size = resource_compressed_size -
241 chunk_offsets[i - start_chunk];
243 uncompressed_chunk_size = resource_uncompressed_size %
246 /* If the remainder is 0, the last chunk actually
247 * uncompresses to a full WIM_CHUNK_SIZE bytes. */
248 if (uncompressed_chunk_size == 0)
249 uncompressed_chunk_size = WIM_CHUNK_SIZE;
252 DEBUG2("compressed_chunk_size = %u, uncompressed_chunk_size = %u\n",
253 compressed_chunk_size, uncompressed_chunk_size);
256 /* Figure out how much of this chunk we actually need to read */
258 if (i == start_chunk)
259 start_offset = start_chunk_offset;
264 end_offset = end_chunk_offset;
266 end_offset = WIM_CHUNK_SIZE - 1;
268 u64 partial_chunk_size = end_offset + 1 - start_offset;
269 bool is_partial_chunk = (partial_chunk_size !=
270 uncompressed_chunk_size);
272 DEBUG2("start_offset = %u, end_offset = %u\n", start_offset,
274 DEBUG2("partial_chunk_size = %u\n", partial_chunk_size);
276 /* This is undocumented, but chunks can be uncompressed. This
277 * appears to always be the case when the compressed chunk size
278 * is equal to the uncompressed chunk size. */
279 if (compressed_chunk_size == uncompressed_chunk_size) {
280 /* Probably an uncompressed chunk */
282 if (start_offset != 0) {
283 if (fseeko(fp, start_offset, SEEK_CUR) != 0) {
284 ERROR("Uncompressed partial chunk "
285 "fseek() error: %m\n");
286 return WIMLIB_ERR_READ;
289 if (fread(out_p, 1, partial_chunk_size, fp) !=
293 /* Compressed chunk */
296 /* Read the compressed data into compressed_buf. */
297 if (fread(compressed_buf, 1, compressed_chunk_size,
298 fp) != compressed_chunk_size)
301 /* For partial chunks we must buffer the uncompressed
302 * data because we don't need all of it. */
303 if (is_partial_chunk) {
304 u8 uncompressed_buf[uncompressed_chunk_size];
306 ret = decompress(compressed_buf,
307 compressed_chunk_size,
309 uncompressed_chunk_size);
311 return WIMLIB_ERR_DECOMPRESSION;
312 memcpy(out_p, uncompressed_buf + start_offset,
315 DEBUG2("out_p = %p\n");
316 ret = decompress(compressed_buf,
317 compressed_chunk_size,
319 uncompressed_chunk_size);
321 return WIMLIB_ERR_DECOMPRESSION;
325 /* Advance the pointer into the uncompressed output data by the
326 * number of uncompressed bytes that were written. */
327 out_p += partial_chunk_size;
334 ERROR("Unexpected EOF in compressed file resource\n");
336 ERROR("Error reading compressed file resource: %m\n");
337 return WIMLIB_ERR_READ;
341 * Reads uncompressed data from an open file stream.
343 int read_uncompressed_resource(FILE *fp, u64 offset, u64 len,
346 if (fseeko(fp, offset, SEEK_SET) != 0) {
347 ERROR("Failed to seek to byte %"PRIu64" of input file "
348 "to read uncompressed resource "
349 "(len = %"PRIu64")!\n", offset, len);
350 return WIMLIB_ERR_READ;
352 if (fread(contents_ret, 1, len, fp) != len) {
354 ERROR("Unexpected EOF in uncompressed file resource!\n");
356 ERROR("Failed to read %"PRIu64" bytes from "
357 "uncompressed resource at offset "
358 "%"PRIu64"\n", len, offset);
360 return WIMLIB_ERR_READ;
366 * Reads a WIM resource.
368 * @fp: The FILE* for the WIM file.
369 * @resource_size: The compressed size of the resource.
370 * @resource_original_size: The uncompressed size of the resource.
371 * @resource_offset: The offset of the resource in the stream @fp.
372 * @resource_ctype: The compression type of the resource.
373 * (WIM_COMPRESSION_TYPE_*)
374 * @len: How many bytes of the resource should be read.
375 * @offset: The offset within the resource at which the read
378 * To read the whole file resource, specify offset =
379 * 0 and len = resource_original_size, or call
380 * read_full_resource().
382 * @contents_ret: An array, that must have length at least @len,
383 * into which the uncompressed contents of
384 * the file resource starting at @offset and
385 * continuing for @len bytes will be written.
387 * @return: Zero on success, nonzero on failure. Failure may be due to
388 * being unable to read the data from the WIM file at the
389 * specified length and offset, or it may be due to the
390 * compressed data (if the data is compressed) being
393 int read_resource(FILE *fp, u64 resource_size, u64 resource_original_size,
394 u64 resource_offset, int resource_ctype, u64 len,
395 u64 offset, void *contents_ret)
397 if (resource_ctype == WIM_COMPRESSION_TYPE_NONE) {
398 if (resource_size != resource_original_size) {
399 ERROR("Resource with original size %"PRIu64" "
400 "bytes is marked as uncompressed, \n",
401 resource_original_size);
402 ERROR(" but its actual size is %"PRIu64" "
405 return WIMLIB_ERR_INVALID_RESOURCE_SIZE;
407 return read_uncompressed_resource(fp,
408 resource_offset + offset,
411 return read_compressed_resource(fp, resource_size,
412 resource_original_size, resource_offset,
413 resource_ctype, len, offset, contents_ret);
419 * Extracts the first @size bytes file resource specified by @entry to the open
420 * file @fd. Returns nonzero on error.
423 * This function is somewhat redundant with uncompress_resource(). The
424 * main difference is that this function writes to a file descriptor using
425 * low-level calls to write() rather than to a FILE* with fwrite(); also this
426 * function allows only up to @size bytes to be extracted.
428 int extract_resource_to_fd(WIMStruct *w, const struct resource_entry *entry,
433 u8 buf[min(size, WIM_CHUNK_SIZE)];
441 num_chunks = (size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
443 res_ctype = wim_resource_compression_type(w, entry);
445 for (i = 0; i < num_chunks; i++) {
446 if (i == num_chunks - 1) {
447 n = size % WIM_CHUNK_SIZE;
453 ret = read_resource(w->fp, entry->size, entry->original_size,
454 entry->offset, res_ctype, n, offset, buf);
458 if (full_write(fd, buf, n) != n)
459 return WIMLIB_ERR_WRITE;
465 /* Reads the contents of a struct resource_entry, as represented in the on-disk
466 * format, from the memory pointed to by @p, and fills in the fields of @entry.
467 * A pointer to the byte after the memory read at @p is returned. */
468 const u8 *get_resource_entry(const u8 *p, struct resource_entry *entry)
473 p = get_u56(p, &size);
474 p = get_u8(p, &flags);
476 entry->flags = flags;
477 p = get_u64(p, &entry->offset);
478 p = get_u64(p, &entry->original_size);
482 /* Copies the struct resource_entry @entry to the memory pointed to by @p in the
483 * on-disk format. A pointer to the byte after the memory written at @p is
485 u8 *put_resource_entry(u8 *p, const struct resource_entry *entry)
487 p = put_u56(p, entry->size);
488 p = put_u8(p, entry->flags);
489 p = put_u64(p, entry->offset);
490 p = put_u64(p, entry->original_size);
494 /* Given the compression type for the WIM file as a whole as the flags field of
495 * a resource entry, returns the compression type for that resource entry. */
496 int resource_compression_type(int wim_ctype, int reshdr_flags)
498 if (wim_ctype == WIM_COMPRESSION_TYPE_NONE) {
499 return WIM_COMPRESSION_TYPE_NONE;
501 if (reshdr_flags & WIM_RESHDR_FLAG_COMPRESSED)
504 return WIM_COMPRESSION_TYPE_NONE;
509 * Reads the metadata metadata resource from the WIM file. The metadata
510 * resource consists of the security data, followed by the directory entry for
511 * the root directory, followed by all the other directory entries in the
512 * filesystem. The subdir_offset field of each directory entry gives the start
513 * of its child entries from the beginning of the metadata resource. An
514 * end-of-directory is signaled by a directory entry of length '0', really of
515 * length 8, because that's how long the 'length' field is.
517 * @fp: The FILE* for the input WIM file.
518 * @res_entry: The resource entry for the metadata resource (a.k.a the metadata
520 * @wim_ctype: The compression type of the WIM file.
521 * @root_dentry_p: A pointer to a pointer to a struct dentry structure into which the
522 * root dentry is allocated and returned.
524 * @return: True on success, false on failure.
526 int read_metadata_resource(FILE *fp, const struct resource_entry *res_entry,
527 int wim_ctype, struct dentry **root_dentry_p)
533 struct dentry *dentry;
535 DEBUG("Reading metadata resource: length = %lu, offset = %lu\n",
536 res_entry->original_size, res_entry->offset);
538 if (res_entry->original_size < 8) {
539 ERROR("Expected at least 8 bytes for the metadata "
541 return WIMLIB_ERR_INVALID_RESOURCE_SIZE;
544 /* Allocate memory for the uncompressed metadata resource. */
545 buf = MALLOC(res_entry->original_size);
548 ERROR("Failed to allocate %"PRIu64" bytes for uncompressed "
549 "metadata resource!\n",
550 res_entry->original_size);
551 return WIMLIB_ERR_NOMEM;
554 /* Determine the compression type of the metadata resource. */
555 ctype = resource_compression_type(wim_ctype, res_entry->flags);
557 /* Read the metadata resource into memory. (It may be compressed.) */
558 ret = read_full_resource(fp, res_entry->size,
559 res_entry->original_size, res_entry->offset,
564 DEBUG("Finished reading metadata resource into memory.\n");
567 /* Read the security data into a WIMSecurityData structure. */
568 if (!read_security_data(buf, res_entry->original_size, sd))
572 dentry = MALLOC(sizeof(struct dentry));
574 ERROR("Failed to allocate %zu bytes for root dentry!\n",
575 sizeof(struct dentry));
576 ret = WIMLIB_ERR_NOMEM;
580 /* Read the root directory entry starts after security data, on an
581 * 8-byte aligned address.
583 * The security data starts with a 4-byte integer giving its total
585 get_u32(buf, &dentry_offset);
586 dentry_offset += (8 - dentry_offset % 8) % 8;
588 ret = read_dentry(buf, res_entry->original_size, dentry_offset, dentry);
592 /* This is the root dentry, so set its pointers correctly. */
593 dentry->parent = dentry;
594 dentry->next = dentry;
595 dentry->prev = dentry;
597 /* Now read the entire directory entry tree. */
598 ret = read_dentry_tree(buf, res_entry->original_size, dentry);
602 /* Calculate the full paths in the dentry tree. */
603 ret = for_dentry_in_tree(dentry, calculate_dentry_full_path, NULL);
607 *root_dentry_p = dentry;
611 free_dentry_tree(dentry, NULL, false);