4 * Support for extracting WIM files.
6 * This code does NOT contain any filesystem-specific features. In particular,
7 * security information (i.e. file permissions) and alternate data streams are
8 * ignored, except possibly to read an alternate data stream that contains
13 * Copyright (C) 2012 Eric Biggers
15 * This file is part of wimlib, a library for working with WIM files.
17 * wimlib is free software; you can redistribute it and/or modify it under the
18 * terms of the GNU General Public License as published by the Free
19 * Software Foundation; either version 3 of the License, or (at your option)
22 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
23 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
24 * A PARTICULAR PURPOSE. See the GNU General Public License for more
27 * You should have received a copy of the GNU General Public License
28 * along with wimlib; if not, see http://www.gnu.org/licenses/.
48 #include "lookup_table.h"
49 #include "timestamp.h"
50 #include "wimlib_internal.h"
54 #include <ntfs-3g/volume.h>
57 static int extract_regular_file_linked(struct wim_dentry *dentry,
58 const char *output_path,
59 struct apply_args *args,
60 struct wim_lookup_table_entry *lte)
62 /* This mode overrides the normal hard-link extraction and
63 * instead either symlinks or hardlinks *all* identical files in
64 * the WIM, even if they are in a different image (in the case
65 * of a multi-image extraction) */
67 if (args->extract_flags & WIMLIB_EXTRACT_FLAG_HARDLINK) {
68 if (link(lte->extracted_file, output_path) != 0) {
69 ERROR_WITH_ERRNO("Failed to hard link "
71 output_path, lte->extracted_file);
72 return WIMLIB_ERR_LINK;
75 int num_path_components;
76 int num_output_dir_path_components;
77 size_t extracted_file_len;
83 get_num_path_components(dentry->full_path_utf8) - 1;
84 num_output_dir_path_components =
85 get_num_path_components(args->target);
87 if (args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) {
88 num_path_components++;
89 num_output_dir_path_components--;
91 extracted_file_len = strlen(lte->extracted_file);
93 char buf[extracted_file_len + 3 * num_path_components + 1];
96 for (i = 0; i < num_path_components; i++) {
101 p2 = lte->extracted_file;
104 while (num_output_dir_path_components--)
105 p2 = path_next_part(p2, NULL);
107 if (symlink(buf, output_path) != 0) {
108 ERROR_WITH_ERRNO("Failed to symlink `%s' to "
110 buf, lte->extracted_file);
111 return WIMLIB_ERR_LINK;
117 static int extract_regular_file_unlinked(struct wim_dentry *dentry,
118 struct apply_args *args,
119 const char *output_path,
120 struct wim_lookup_table_entry *lte)
122 /* Normal mode of extraction. Regular files and hard links are
123 * extracted in the way that they appear in the WIM. */
127 struct wim_inode *inode = dentry->d_inode;
129 if (!((args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE)
130 && (args->extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
131 WIMLIB_EXTRACT_FLAG_HARDLINK))))
133 /* If the dentry is part of a hard link set of at least 2
134 * dentries and one of the other dentries has already been
135 * extracted, make a hard link to the file corresponding to this
136 * already-extracted directory. Otherwise, extract the file and
137 * set the inode->i_extracted_file field so that other dentries
138 * in the hard link group can link to it. */
139 if (inode->i_nlink > 1) {
140 if (inode->i_extracted_file) {
141 DEBUG("Extracting hard link `%s' => `%s'",
142 output_path, inode->i_extracted_file);
143 if (link(inode->i_extracted_file, output_path) != 0) {
144 ERROR_WITH_ERRNO("Failed to hard link "
147 inode->i_extracted_file);
148 return WIMLIB_ERR_LINK;
152 FREE(inode->i_extracted_file);
153 inode->i_extracted_file = STRDUP(output_path);
154 if (!inode->i_extracted_file) {
155 ERROR("Failed to allocate memory for filename");
156 return WIMLIB_ERR_NOMEM;
161 /* Extract the contents of the file to @output_path. */
163 out_fd = open(output_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
165 ERROR_WITH_ERRNO("Failed to open the file `%s' for writing",
167 return WIMLIB_ERR_OPEN;
171 /* Empty file with no lookup table entry */
172 DEBUG("Empty file `%s'.", output_path);
177 ret = extract_wim_resource_to_fd(lte, out_fd, wim_resource_size(lte));
179 ERROR("Failed to extract resource to `%s'", output_path);
182 args->progress.extract.completed_bytes += wim_resource_size(lte);
184 if (close(out_fd) != 0) {
185 ERROR_WITH_ERRNO("Failed to close file `%s'", output_path);
186 ret = WIMLIB_ERR_WRITE;
191 static int extract_regular_file(struct wim_dentry *dentry,
192 struct apply_args *args,
193 const char *output_path)
195 struct wim_lookup_table_entry *lte;
196 const struct wim_inode *inode = dentry->d_inode;
198 lte = inode_unnamed_lte_resolved(inode);
200 if (lte && (args->extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
201 WIMLIB_EXTRACT_FLAG_HARDLINK)))
203 if (lte->extracted_file) {
204 return extract_regular_file_linked(dentry, output_path, args, lte);
206 lte->extracted_file = STRDUP(output_path);
207 if (!lte->extracted_file)
208 return WIMLIB_ERR_NOMEM;
211 return extract_regular_file_unlinked(dentry, args, output_path, lte);
214 static int extract_symlink(struct wim_dentry *dentry,
215 struct apply_args *args,
216 const char *output_path)
219 ssize_t ret = inode_readlink(dentry->d_inode, target,
220 sizeof(target), args->w, 0);
221 struct wim_lookup_table_entry *lte;
224 ERROR("Could not read the symbolic link from dentry `%s'",
225 dentry->full_path_utf8);
226 return WIMLIB_ERR_INVALID_DENTRY;
228 ret = symlink(target, output_path);
230 ERROR_WITH_ERRNO("Failed to symlink `%s' to `%s'",
231 output_path, target);
232 return WIMLIB_ERR_LINK;
234 lte = inode_unnamed_lte_resolved(dentry->d_inode);
235 args->progress.extract.completed_bytes += wim_resource_size(lte);
239 static int extract_directory(const char *output_path, bool is_root)
243 ret = stat(output_path, &stbuf);
245 if (S_ISDIR(stbuf.st_mode)) {
247 /*WARNING("`%s' already exists", output_path);*/
250 ERROR("`%s' is not a directory", output_path);
251 return WIMLIB_ERR_MKDIR;
254 if (errno != ENOENT) {
255 ERROR_WITH_ERRNO("Failed to stat `%s'", output_path);
256 return WIMLIB_ERR_STAT;
259 if (mkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP |
260 S_IROTH | S_IXOTH) != 0) {
261 ERROR_WITH_ERRNO("Cannot create directory `%s'",
263 return WIMLIB_ERR_MKDIR;
268 /* Extracts a file, directory, or symbolic link from the WIM archive. */
269 static int apply_dentry_normal(struct wim_dentry *dentry, void *arg)
271 struct apply_args *args = arg;
272 struct wim_inode *inode = dentry->d_inode;
275 len = strlen(args->target);
276 char output_path[len + dentry->full_path_utf8_len + 1];
277 memcpy(output_path, args->target, len);
278 memcpy(output_path + len, dentry->full_path_utf8, dentry->full_path_utf8_len);
279 output_path[len + dentry->full_path_utf8_len] = '\0';
281 if (inode_is_symlink(inode))
282 return extract_symlink(dentry, args, output_path);
283 else if (inode_is_directory(inode))
284 return extract_directory(output_path, false);
286 return extract_regular_file(dentry, args, output_path);
289 /* Apply timestamps to an extracted file or directory */
290 static int apply_dentry_timestamps_normal(struct wim_dentry *dentry, void *arg)
292 struct apply_args *args = arg;
293 size_t len = strlen(args->target);
294 char output_path[len + dentry->full_path_utf8_len + 1];
295 const struct wim_inode *inode = dentry->d_inode;
298 memcpy(output_path, args->target, len);
299 memcpy(output_path + len, dentry->full_path_utf8, dentry->full_path_utf8_len);
300 output_path[len + dentry->full_path_utf8_len] = '\0';
302 /* Convert the WIM timestamps, which are accurate to 100 nanoseconds,
303 * into struct timeval's. */
304 struct timeval tv[2];
305 wim_timestamp_to_timeval(inode->i_last_access_time, &tv[0]);
306 wim_timestamp_to_timeval(inode->i_last_write_time, &tv[1]);
308 ret = lutimes(output_path, tv);
315 if (errno == ENOSYS) {
317 buf.actime = wim_timestamp_to_unix(inode->i_last_access_time);
318 buf.modtime = wim_timestamp_to_unix(inode->i_last_write_time);
319 if (utime(output_path, &buf) == 0)
323 if (errno != ENOSYS || args->num_lutimes_warnings < 10) {
324 /*WARNING("Failed to set timestamp on file `%s': %s",*/
325 /*output_path, strerror(errno));*/
326 args->num_lutimes_warnings++;
332 /* Extract a dentry if it hasn't already been extracted, and either the dentry
333 * has no streams or WIMLIB_EXTRACT_FLAG_NO_STREAMS is not specified. */
334 static int maybe_apply_dentry(struct wim_dentry *dentry, void *arg)
336 struct apply_args *args = arg;
339 if (dentry->is_extracted)
342 if (args->extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS)
343 if (inode_unnamed_lte_resolved(dentry->d_inode))
346 if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) &&
347 args->progress_func) {
348 args->progress.extract.cur_path = dentry->full_path_utf8;
349 args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY,
352 ret = args->apply_dentry(dentry, args);
354 dentry->is_extracted = 1;
358 static int cmp_streams_by_wim_position(const void *p1, const void *p2)
360 const struct wim_lookup_table_entry *lte1, *lte2;
361 lte1 = *(const struct wim_lookup_table_entry**)p1;
362 lte2 = *(const struct wim_lookup_table_entry**)p2;
363 if (lte1->resource_entry.offset < lte2->resource_entry.offset)
365 else if (lte1->resource_entry.offset > lte2->resource_entry.offset)
371 static int sort_stream_list_by_wim_position(struct list_head *stream_list)
373 struct list_head *cur;
375 struct wim_lookup_table_entry **array;
380 list_for_each(cur, stream_list)
382 array_size = num_streams * sizeof(array[0]);
383 array = MALLOC(array_size);
385 ERROR("Failed to allocate %zu bytes to sort stream entries",
387 return WIMLIB_ERR_NOMEM;
389 cur = stream_list->next;
390 for (i = 0; i < num_streams; i++) {
391 array[i] = container_of(cur, struct wim_lookup_table_entry, staging_list);
395 qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position);
397 INIT_LIST_HEAD(stream_list);
398 for (i = 0; i < num_streams; i++)
399 list_add_tail(&array[i]->staging_list, stream_list);
404 static void calculate_bytes_to_extract(struct list_head *stream_list,
406 union wimlib_progress_info *progress)
408 struct wim_lookup_table_entry *lte;
412 /* For each stream to be extracted... */
413 list_for_each_entry(lte, stream_list, staging_list) {
415 (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
417 /* In the symlink or hard link extraction mode, each
418 * stream will be extracted one time regardless of how
419 * many dentries share the stream. */
420 wimlib_assert(!(extract_flags & WIMLIB_EXTRACT_FLAG_NTFS));
421 if (!lte->extracted_file) {
423 total_bytes += wim_resource_size(lte);
426 num_streams += lte->out_refcnt;
427 total_bytes += lte->out_refcnt * wim_resource_size(lte);
430 progress->extract.num_streams = num_streams;
431 progress->extract.total_bytes = total_bytes;
432 progress->extract.completed_bytes = 0;
435 static void maybe_add_stream_for_extraction(struct wim_lookup_table_entry *lte,
436 struct list_head *stream_list)
438 if (++lte->out_refcnt == 1) {
439 INIT_LIST_HEAD(<e->inode_list);
440 list_add_tail(<e->staging_list, stream_list);
444 static void inode_find_streams_for_extraction(struct wim_inode *inode,
445 struct list_head *stream_list,
448 struct wim_lookup_table_entry *lte;
449 bool inode_added = false;
451 lte = inode_unnamed_lte_resolved(inode);
453 maybe_add_stream_for_extraction(lte, stream_list);
454 list_add_tail(&inode->i_lte_inode_list, <e->inode_list);
458 if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
459 for (unsigned i = 0; i < inode->i_num_ads; i++) {
460 if (inode->i_ads_entries[i].stream_name_len != 0) {
461 lte = inode->i_ads_entries[i].lte;
463 maybe_add_stream_for_extraction(lte,
466 list_add_tail(&inode->i_lte_inode_list,
477 static void find_streams_for_extraction(struct hlist_head *inode_list,
478 struct list_head *stream_list,
479 struct wim_lookup_table *lookup_table,
482 struct wim_inode *inode;
483 struct hlist_node *cur;
484 struct wim_dentry *dentry;
486 for_lookup_table_entry(lookup_table, lte_zero_out_refcnt, NULL);
487 INIT_LIST_HEAD(stream_list);
488 hlist_for_each_entry(inode, cur, inode_list, i_hlist) {
489 if (!inode->i_resolved)
490 inode_resolve_ltes(inode, lookup_table);
491 inode_for_each_dentry(dentry, inode)
492 dentry->is_extracted = 0;
493 inode_find_streams_for_extraction(inode, stream_list,
498 struct apply_operations {
499 int (*apply_dentry)(struct wim_dentry *dentry, void *arg);
500 int (*apply_dentry_timestamps)(struct wim_dentry *dentry, void *arg);
503 static const struct apply_operations normal_apply_operations = {
504 .apply_dentry = apply_dentry_normal,
505 .apply_dentry_timestamps = apply_dentry_timestamps_normal,
509 static const struct apply_operations ntfs_apply_operations = {
510 .apply_dentry = apply_dentry_ntfs,
511 .apply_dentry_timestamps = apply_dentry_timestamps_ntfs,
515 static int apply_stream_list(struct list_head *stream_list,
516 struct apply_args *args,
517 const struct apply_operations *ops,
518 wimlib_progress_func_t progress_func)
520 uint64_t bytes_per_progress = args->progress.extract.total_bytes / 100;
521 uint64_t next_progress = bytes_per_progress;
522 struct wim_lookup_table_entry *lte;
523 struct wim_inode *inode;
524 struct wim_dentry *dentry;
527 /* This complicated loop is essentially looping through the dentries,
528 * although dentries may be visited more than once (if a dentry contains
529 * two different nonempty streams) or not at all (if a dentry contains
530 * no non-empty streams).
532 * The outer loop is over the distinct streams to be extracted so that
533 * sequential reading of the WIM can be implemented. */
535 /* For each distinct stream to be extracted */
536 list_for_each_entry(lte, stream_list, staging_list) {
537 /* For each inode that contains the stream */
538 list_for_each_entry(inode, <e->inode_list, i_lte_inode_list) {
539 /* For each dentry that points to the inode */
540 inode_for_each_dentry(dentry, inode) {
541 /* Extract the dentry if it was not already
543 ret = maybe_apply_dentry(dentry, args);
547 args->progress.extract.completed_bytes >= next_progress)
549 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS,
551 if (args->progress.extract.completed_bytes >=
552 args->progress.extract.total_bytes)
554 next_progress = ~0ULL;
557 min (args->progress.extract.completed_bytes +
559 args->progress.extract.total_bytes);
568 /* Extracts the image @image from the WIM @w to the directory or NTFS volume
570 static int extract_single_image(WIMStruct *w, int image,
571 const char *target, int extract_flags,
572 wimlib_progress_func_t progress_func)
575 struct list_head stream_list;
576 struct hlist_head *inode_list;
578 struct apply_args args;
579 const struct apply_operations *ops;
582 args.target = target;
583 args.extract_flags = extract_flags;
584 args.num_lutimes_warnings = 0;
585 args.stream_list = &stream_list;
586 args.progress_func = progress_func;
589 args.progress.extract.wimfile_name = w->filename;
590 args.progress.extract.image = image;
591 args.progress.extract.extract_flags = (extract_flags &
592 WIMLIB_EXTRACT_MASK_PUBLIC);
593 args.progress.extract.image_name = wimlib_get_image_name(w, image);
594 args.progress.extract.target = target;
598 if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
599 args.vol = ntfs_mount(target, 0);
601 ERROR_WITH_ERRNO("Failed to mount NTFS volume `%s'", target);
602 return WIMLIB_ERR_NTFS_3G;
604 ops = &ntfs_apply_operations;
607 ops = &normal_apply_operations;
609 ret = select_wim_image(w, image);
613 inode_list = &w->image_metadata[image - 1].inode_list;
615 /* Build a list of the streams that need to be extracted */
616 find_streams_for_extraction(inode_list, &stream_list,
617 w->lookup_table, extract_flags);
619 /* Calculate the number of bytes of data that will be extracted */
620 calculate_bytes_to_extract(&stream_list, extract_flags,
624 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN,
628 /* If a sequential extraction was specified, sort the streams to be
629 * extracted by their position in the WIM file, so that the WIM file can
630 * be read sequentially. */
631 if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) {
632 ret = sort_stream_list_by_wim_position(&stream_list);
634 WARNING("Falling back to non-sequential extraction");
635 extract_flags &= ~WIMLIB_EXTRACT_FLAG_SEQUENTIAL;
640 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN,
644 /* Make the directory structure and extract empty files */
645 args.extract_flags |= WIMLIB_EXTRACT_FLAG_NO_STREAMS;
646 args.apply_dentry = ops->apply_dentry;
647 ret = for_dentry_in_tree(wim_root_dentry(w), maybe_apply_dentry, &args);
648 args.extract_flags &= ~WIMLIB_EXTRACT_FLAG_NO_STREAMS;
653 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END,
657 /* Extract non-empty files */
658 ret = apply_stream_list(&stream_list, &args, ops, progress_func);
663 progress_func(WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS,
667 /* Apply timestamps */
668 ret = for_dentry_in_tree_depth(wim_root_dentry(w),
669 ops->apply_dentry_timestamps, &args);
674 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END,
679 /* Unmount the NTFS volume */
680 if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
681 if (ntfs_umount(args.vol, FALSE) != 0) {
682 ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%s'", args.target);
684 ret = WIMLIB_ERR_NTFS_3G;
692 /* Extracts all images from the WIM to the directory @target, with the images
693 * placed in subdirectories named by their image names. */
694 static int extract_all_images(WIMStruct *w, const char *target,
696 wimlib_progress_func_t progress_func)
698 size_t image_name_max_len = max(xml_get_max_image_name_len(w), 20);
699 size_t output_path_len = strlen(target);
700 char buf[output_path_len + 1 + image_name_max_len + 1];
703 const char *image_name;
705 ret = extract_directory(target, true);
709 memcpy(buf, target, output_path_len);
710 buf[output_path_len] = '/';
711 for (image = 1; image <= w->hdr.image_count; image++) {
712 image_name = wimlib_get_image_name(w, image);
713 if (image_name && *image_name) {
714 strcpy(buf + output_path_len + 1, image_name);
716 /* Image name is empty. Use image number instead */
717 sprintf(buf + output_path_len + 1, "%d", image);
719 ret = extract_single_image(w, image, buf, extract_flags,
727 /* Extracts a single image or all images from a WIM file to a directory or NTFS
729 WIMLIBAPI int wimlib_extract_image(WIMStruct *w,
733 WIMStruct **additional_swms,
734 unsigned num_additional_swms,
735 wimlib_progress_func_t progress_func)
737 struct wim_lookup_table *joined_tab, *w_tab_save;
741 return WIMLIB_ERR_INVALID_PARAM;
743 extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
745 if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
746 == (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
747 return WIMLIB_ERR_INVALID_PARAM;
749 if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
751 if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))) {
752 ERROR("Cannot specify symlink or hardlink flags when applying\n"
753 " directly to a NTFS volume");
754 return WIMLIB_ERR_INVALID_PARAM;
756 if (image == WIMLIB_ALL_IMAGES) {
757 ERROR("Can only apply a single image when applying "
758 "directly to a NTFS volume");
759 return WIMLIB_ERR_INVALID_PARAM;
762 ERROR("wimlib was compiled without support for NTFS-3g, so");
763 ERROR("we cannot apply a WIM image directly to a NTFS volume");
764 return WIMLIB_ERR_UNSUPPORTED;
768 ret = verify_swm_set(w, additional_swms, num_additional_swms);
772 if (num_additional_swms) {
773 ret = new_joined_lookup_table(w, additional_swms,
774 num_additional_swms, &joined_tab);
777 w_tab_save = w->lookup_table;
778 w->lookup_table = joined_tab;
781 if (image == WIMLIB_ALL_IMAGES) {
782 extract_flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
783 ret = extract_all_images(w, target, extract_flags,
786 extract_flags &= ~WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
787 ret = extract_single_image(w, image, target, extract_flags,
791 if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
792 WIMLIB_EXTRACT_FLAG_HARDLINK))
794 for_lookup_table_entry(w->lookup_table,
795 lte_free_extracted_file,
799 if (num_additional_swms) {
800 free_lookup_table(w->lookup_table);
801 w->lookup_table = w_tab_save;