4 * Support for extracting WIM files.
6 * This code does NOT contain any filesystem-specific features. In particular,
7 * security information (i.e. file permissions) and alternate data streams are
8 * ignored, except possibly to read an alternate data stream that contains
13 * Copyright (C) 2012 Eric Biggers
15 * This file is part of wimlib, a library for working with WIM files.
17 * wimlib is free software; you can redistribute it and/or modify it under the
18 * terms of the GNU General Public License as published by the Free
19 * Software Foundation; either version 3 of the License, or (at your option)
22 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
23 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
24 * A PARTICULAR PURPOSE. See the GNU General Public License for more
27 * You should have received a copy of the GNU General Public License
28 * along with wimlib; if not, see http://www.gnu.org/licenses/.
49 #include "lookup_table.h"
50 #include "timestamp.h"
51 #include "wimlib_internal.h"
55 #include <ntfs-3g/volume.h>
58 static int extract_regular_file_linked(struct dentry *dentry,
59 const char *output_path,
60 struct apply_args *args,
61 struct lookup_table_entry *lte)
63 /* This mode overrides the normal hard-link extraction and
64 * instead either symlinks or hardlinks *all* identical files in
65 * the WIM, even if they are in a different image (in the case
66 * of a multi-image extraction) */
68 if (args->extract_flags & WIMLIB_EXTRACT_FLAG_HARDLINK) {
69 if (link(lte->extracted_file, output_path) != 0) {
70 ERROR_WITH_ERRNO("Failed to hard link "
72 output_path, lte->extracted_file);
73 return WIMLIB_ERR_LINK;
76 int num_path_components;
77 int num_output_dir_path_components;
78 size_t extracted_file_len;
84 get_num_path_components(dentry->full_path_utf8) - 1;
85 num_output_dir_path_components =
86 get_num_path_components(args->target);
88 if (args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) {
89 num_path_components++;
90 num_output_dir_path_components--;
92 extracted_file_len = strlen(lte->extracted_file);
94 char buf[extracted_file_len + 3 * num_path_components + 1];
97 for (i = 0; i < num_path_components; i++) {
102 p2 = lte->extracted_file;
105 while (num_output_dir_path_components--)
106 p2 = path_next_part(p2, NULL);
108 if (symlink(buf, output_path) != 0) {
109 ERROR_WITH_ERRNO("Failed to symlink `%s' to "
111 buf, lte->extracted_file);
112 return WIMLIB_ERR_LINK;
118 static int extract_regular_file_unlinked(struct dentry *dentry,
119 struct apply_args *args,
120 const char *output_path,
121 struct lookup_table_entry *lte)
123 /* Normal mode of extraction. Regular files and hard links are
124 * extracted in the way that they appear in the WIM. */
128 struct inode *inode = dentry->d_inode;
130 if (!((args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE)
131 && (args->extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
132 WIMLIB_EXTRACT_FLAG_HARDLINK))))
134 /* If the dentry is one of a hard link set of at least 2
135 * dentries and one of the other dentries has already been
136 * extracted, make a hard link to the file corresponding to this
137 * already-extracted directory. Otherwise, extract the file,
138 * and set the inode->extracted_file field so that other
139 * dentries in the hard link group can link to it. */
140 if (inode->link_count > 1) {
141 if (inode->extracted_file) {
142 DEBUG("Extracting hard link `%s' => `%s'",
143 output_path, inode->extracted_file);
144 if (link(inode->extracted_file, output_path) != 0) {
145 ERROR_WITH_ERRNO("Failed to hard link "
148 inode->extracted_file);
149 return WIMLIB_ERR_LINK;
153 FREE(inode->extracted_file);
154 inode->extracted_file = STRDUP(output_path);
155 if (!inode->extracted_file) {
156 ERROR("Failed to allocate memory for filename");
157 return WIMLIB_ERR_NOMEM;
162 /* Extract the contents of the file to @output_path. */
164 out_fd = open(output_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
166 ERROR_WITH_ERRNO("Failed to open the file `%s' for writing",
168 return WIMLIB_ERR_OPEN;
172 /* Empty file with no lookup table entry */
173 DEBUG("Empty file `%s'.", output_path);
178 ret = extract_full_wim_resource_to_fd(lte, out_fd);
180 ERROR("Failed to extract resource to `%s'", output_path);
183 args->progress.extract.completed_bytes += wim_resource_size(lte);
185 if (close(out_fd) != 0) {
186 ERROR_WITH_ERRNO("Failed to close file `%s'", output_path);
187 ret = WIMLIB_ERR_WRITE;
193 * Extracts a regular file from the WIM archive.
195 static int extract_regular_file(struct dentry *dentry,
196 struct apply_args *args,
197 const char *output_path)
199 struct lookup_table_entry *lte;
200 const struct inode *inode = dentry->d_inode;
202 lte = inode_unnamed_lte_resolved(inode);
204 if (lte && (args->extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
205 WIMLIB_EXTRACT_FLAG_HARDLINK)))
207 if (lte->extracted_file) {
208 return extract_regular_file_linked(dentry, output_path, args, lte);
210 lte->extracted_file = STRDUP(output_path);
211 if (!lte->extracted_file)
212 return WIMLIB_ERR_NOMEM;
215 return extract_regular_file_unlinked(dentry, args, output_path, lte);
218 static int extract_symlink(struct dentry *dentry,
219 struct apply_args *args,
220 const char *output_path)
223 ssize_t ret = inode_readlink(dentry->d_inode, target,
224 sizeof(target), args->w, 0);
225 struct lookup_table_entry *lte;
228 ERROR("Could not read the symbolic link from dentry `%s'",
229 dentry->full_path_utf8);
230 return WIMLIB_ERR_INVALID_DENTRY;
232 ret = symlink(target, output_path);
234 ERROR_WITH_ERRNO("Failed to symlink `%s' to `%s'",
235 output_path, target);
236 return WIMLIB_ERR_LINK;
238 lte = inode_unnamed_lte_resolved(dentry->d_inode);
239 args->progress.extract.completed_bytes += wim_resource_size(lte);
244 * Extracts a directory from the WIM archive.
246 * @dentry: The directory entry for the directory.
247 * @output_path: The path to which the directory is to be extracted to.
248 * @return: True on success, false on failure.
250 static int extract_directory(const char *output_path, bool is_root)
254 ret = stat(output_path, &stbuf);
256 if (S_ISDIR(stbuf.st_mode)) {
258 /*WARNING("`%s' already exists", output_path);*/
261 ERROR("`%s' is not a directory", output_path);
262 return WIMLIB_ERR_MKDIR;
265 if (errno != ENOENT) {
266 ERROR_WITH_ERRNO("Failed to stat `%s'", output_path);
267 return WIMLIB_ERR_STAT;
270 /* Compute the output path directory to the directory. */
271 if (mkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP |
272 S_IROTH | S_IXOTH) != 0) {
273 ERROR_WITH_ERRNO("Cannot create directory `%s'",
275 return WIMLIB_ERR_MKDIR;
281 * Extracts a file, directory, or symbolic link from the WIM archive. For use
282 * in for_dentry_in_tree().
284 static int apply_dentry_normal(struct dentry *dentry, void *arg)
286 struct apply_args *args = arg;
287 int extract_flags = args->extract_flags;
288 struct inode *inode = dentry->d_inode;
292 if (dentry->is_extracted)
295 if (extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS)
296 if (inode_unnamed_lte_resolved(inode))
299 if ((extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) &&
302 args->progress.extract.cur_path = dentry->full_path_utf8;
303 args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY,
307 len = strlen(args->target);
308 char output_path[len + dentry->full_path_utf8_len + 1];
309 memcpy(output_path, args->target, len);
310 memcpy(output_path + len, dentry->full_path_utf8, dentry->full_path_utf8_len);
311 output_path[len + dentry->full_path_utf8_len] = '\0';
313 if (inode_is_symlink(inode))
314 ret = extract_symlink(dentry, args, output_path);
315 else if (inode_is_directory(inode))
316 ret = extract_directory(output_path, false);
318 ret = extract_regular_file(dentry, args, output_path);
320 dentry->is_extracted = 1;
324 /* Apply timestamp to extracted file */
325 static int apply_dentry_timestamps_normal(struct dentry *dentry, void *arg)
327 struct apply_args *args = arg;
328 size_t len = strlen(args->target);
329 char output_path[len + dentry->full_path_utf8_len + 1];
330 const struct inode *inode = dentry->d_inode;
333 memcpy(output_path, args->target, len);
334 memcpy(output_path + len, dentry->full_path_utf8, dentry->full_path_utf8_len);
335 output_path[len + dentry->full_path_utf8_len] = '\0';
337 struct timeval tv[2];
338 wim_timestamp_to_timeval(inode->last_access_time, &tv[0]);
339 wim_timestamp_to_timeval(inode->last_write_time, &tv[1]);
341 ret = lutimes(output_path, tv);
348 if (errno == ENOSYS) {
350 buf.actime = wim_timestamp_to_unix(inode->last_access_time);
351 buf.modtime = wim_timestamp_to_unix(inode->last_write_time);
352 if (utime(output_path, &buf) == 0)
356 if (errno != ENOSYS || args->num_lutimes_warnings < 10) {
357 /*WARNING("Failed to set timestamp on file `%s': %s",*/
358 /*output_path, strerror(errno));*/
359 args->num_lutimes_warnings++;
365 static int cmp_streams_by_wim_position(const void *p1, const void *p2)
367 const struct lookup_table_entry *lte1, *lte2;
368 lte1 = *(const struct lookup_table_entry**)p1;
369 lte2 = *(const struct lookup_table_entry**)p2;
370 if (lte1->resource_entry.offset < lte2->resource_entry.offset)
372 else if (lte1->resource_entry.offset > lte2->resource_entry.offset)
378 static int sort_stream_list_by_wim_position(struct list_head *stream_list)
380 struct list_head *cur;
382 struct lookup_table_entry **array;
387 list_for_each(cur, stream_list)
389 array_size = num_streams * sizeof(array[0]);
390 array = MALLOC(array_size);
392 ERROR("Failed to allocate %zu bytes to sort stream entries",
394 return WIMLIB_ERR_NOMEM;
396 cur = stream_list->next;
397 for (i = 0; i < num_streams; i++) {
398 array[i] = container_of(cur, struct lookup_table_entry, staging_list);
402 qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position);
404 INIT_LIST_HEAD(stream_list);
405 for (i = 0; i < num_streams; i++)
406 list_add_tail(&array[i]->staging_list, stream_list);
411 static void calculate_bytes_to_extract(struct list_head *stream_list,
413 union wimlib_progress_info *progress)
415 struct lookup_table_entry *lte;
419 /* For each stream to be extracted... */
420 list_for_each_entry(lte, stream_list, staging_list) {
422 (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
424 /* In the symlink or hard link extraction mode, each
425 * stream will be extracted one time regardless of how
426 * many dentries share the stream. */
427 wimlib_assert(!(extract_flags & WIMLIB_EXTRACT_FLAG_NTFS));
428 if (!lte->extracted_file) {
430 total_bytes += wim_resource_size(lte);
433 num_streams += lte->out_refcnt;
434 total_bytes += lte->out_refcnt * wim_resource_size(lte);
437 progress->extract.num_streams = num_streams;
438 progress->extract.total_bytes = total_bytes;
439 progress->extract.completed_bytes = 0;
442 static void maybe_add_stream_for_extraction(struct lookup_table_entry *lte,
443 struct list_head *stream_list)
445 if (++lte->out_refcnt == 1) {
446 INIT_LIST_HEAD(<e->inode_list);
447 list_add_tail(<e->staging_list, stream_list);
451 static void inode_find_streams_for_extraction(struct inode *inode,
452 struct list_head *stream_list,
455 struct lookup_table_entry *lte;
456 bool inode_added = false;
458 lte = inode_unnamed_lte_resolved(inode);
461 maybe_add_stream_for_extraction(lte, stream_list);
462 list_add_tail(&inode->lte_inode_list, <e->inode_list);
466 if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
467 for (unsigned i = 0; i < inode->num_ads; i++) {
468 if (inode->ads_entries[i].stream_name_len != 0) {
469 lte = inode->ads_entries[i].lte;
471 maybe_add_stream_for_extraction(lte,
474 list_add_tail(&inode->lte_inode_list,
485 static void find_streams_for_extraction(struct hlist_head *inode_list,
486 struct list_head *stream_list,
487 struct lookup_table *lookup_table,
491 struct hlist_node *cur;
492 struct dentry *dentry;
494 for_lookup_table_entry(lookup_table, lte_zero_out_refcnt, NULL);
495 INIT_LIST_HEAD(stream_list);
496 hlist_for_each_entry(inode, cur, inode_list, hlist) {
497 if (!inode->resolved)
498 inode_resolve_ltes(inode, lookup_table);
499 inode_for_each_dentry(dentry, inode)
500 dentry->is_extracted = 0;
501 inode_find_streams_for_extraction(inode, stream_list,
506 struct apply_operations {
507 int (*apply_dentry)(struct dentry *dentry, void *arg);
508 int (*apply_dentry_timestamps)(struct dentry *dentry, void *arg);
511 static const struct apply_operations normal_apply_operations = {
512 .apply_dentry = apply_dentry_normal,
513 .apply_dentry_timestamps = apply_dentry_timestamps_normal,
517 static const struct apply_operations ntfs_apply_operations = {
518 .apply_dentry = apply_dentry_ntfs,
519 .apply_dentry_timestamps = apply_dentry_timestamps_ntfs,
523 static int apply_stream_list(struct list_head *stream_list,
524 struct apply_args *args,
525 const struct apply_operations *ops,
526 wimlib_progress_func_t progress_func)
528 uint64_t bytes_per_progress = args->progress.extract.total_bytes / 100;
529 uint64_t next_progress = bytes_per_progress;
530 struct lookup_table_entry *lte;
532 struct dentry *dentry;
535 /* This complicated loop is actually just looping through the dentries
536 * (as for_dentry_in_tree() does), but the outer loop is actually over
537 * the distinct streams to be extracted so that sequential reading of
538 * the WIM can be implemented. */
540 /* For each distinct stream to be extracted */
541 list_for_each_entry(lte, stream_list, staging_list) {
542 /* For each inode that contains the stream */
543 list_for_each_entry(inode, <e->inode_list, lte_inode_list) {
544 /* For each dentry that points to the inode */
545 inode_for_each_dentry(dentry, inode) {
546 ret = ops->apply_dentry(dentry, args);
550 args->progress.extract.completed_bytes >= next_progress &&
551 args->progress.extract.total_bytes != 0)
553 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS,
555 next_progress += bytes_per_progress;
564 static int extract_single_image(WIMStruct *w, int image,
565 const char *target, int extract_flags,
566 wimlib_progress_func_t progress_func)
569 struct list_head stream_list;
570 struct hlist_head *inode_list;
572 struct apply_args args;
573 const struct apply_operations *ops;
576 args.target = target;
577 args.extract_flags = extract_flags;
578 args.num_lutimes_warnings = 0;
579 args.target = target;
580 args.stream_list = &stream_list;
581 args.progress_func = progress_func;
584 args.progress.extract.wimfile_name = w->filename;
585 args.progress.extract.image = image;
586 args.progress.extract.extract_flags = (extract_flags &
587 WIMLIB_EXTRACT_MASK_PUBLIC);
588 args.progress.extract.image_name = wimlib_get_image_name(w, image);
589 args.progress.extract.target = target;
593 if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
594 args.vol = ntfs_mount(target, 0);
596 ERROR_WITH_ERRNO("Failed to mount NTFS volume `%s'", target);
597 return WIMLIB_ERR_NTFS_3G;
599 ops = &ntfs_apply_operations;
602 ops = &normal_apply_operations;
604 ret = select_wim_image(w, image);
608 inode_list = &w->image_metadata[image - 1].inode_list;
610 find_streams_for_extraction(inode_list, &stream_list,
611 w->lookup_table, extract_flags);
613 calculate_bytes_to_extract(&stream_list, extract_flags,
617 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN,
621 if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) {
622 ret = sort_stream_list_by_wim_position(&stream_list);
624 WARNING("Falling back to non-sequential extraction");
625 extract_flags &= ~WIMLIB_EXTRACT_FLAG_SEQUENTIAL;
630 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN,
634 args.extract_flags |= WIMLIB_EXTRACT_FLAG_NO_STREAMS;
635 ret = for_dentry_in_tree(wim_root_dentry(w), ops->apply_dentry, &args);
636 args.extract_flags &= ~WIMLIB_EXTRACT_FLAG_NO_STREAMS;
641 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END,
645 ret = apply_stream_list(&stream_list, &args, ops, progress_func);
650 progress_func(WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS, NULL);
652 ret = for_dentry_in_tree_depth(wim_root_dentry(w),
653 ops->apply_dentry_timestamps, &args);
658 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END,
663 if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
664 if (ntfs_umount(args.vol, FALSE) != 0) {
665 ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%s'", args.target);
667 ret = WIMLIB_ERR_NTFS_3G;
675 /* Extracts all images from the WIM to @output_dir, with the images placed in
676 * subdirectories named by their image names. */
677 static int extract_all_images(WIMStruct *w, const char *target,
679 wimlib_progress_func_t progress_func)
681 size_t image_name_max_len = max(xml_get_max_image_name_len(w), 20);
682 size_t output_path_len = strlen(target);
683 char buf[output_path_len + 1 + image_name_max_len + 1];
686 const char *image_name;
688 ret = extract_directory(target, true);
692 memcpy(buf, target, output_path_len);
693 buf[output_path_len] = '/';
694 for (image = 1; image <= w->hdr.image_count; image++) {
695 image_name = wimlib_get_image_name(w, image);
696 if (image_name && *image_name) {
697 strcpy(buf + output_path_len + 1, image_name);
699 /* Image name is empty. Use image number instead */
700 sprintf(buf + output_path_len + 1, "%d", image);
702 ret = extract_single_image(w, image, buf, extract_flags,
710 /* Extracts a single image or all images from a WIM file. */
711 WIMLIBAPI int wimlib_extract_image(WIMStruct *w, int image,
714 WIMStruct **additional_swms,
715 unsigned num_additional_swms,
716 wimlib_progress_func_t progress_func)
718 struct lookup_table *joined_tab, *w_tab_save;
722 return WIMLIB_ERR_INVALID_PARAM;
724 extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
726 if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
727 == (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
728 return WIMLIB_ERR_INVALID_PARAM;
730 if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
732 if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))) {
733 ERROR("Cannot specify symlink or hardlink flags when applying\n"
734 " directly to a NTFS volume");
735 return WIMLIB_ERR_INVALID_PARAM;
737 if (image == WIMLIB_ALL_IMAGES) {
738 ERROR("Can only apply a single image when applying "
739 "directly to a NTFS volume");
740 return WIMLIB_ERR_INVALID_PARAM;
743 ERROR("wimlib was compiled without support for NTFS-3g, so");
744 ERROR("we cannot apply a WIM image directly to a NTFS volume");
745 return WIMLIB_ERR_UNSUPPORTED;
749 ret = verify_swm_set(w, additional_swms, num_additional_swms);
753 if (num_additional_swms) {
754 ret = new_joined_lookup_table(w, additional_swms,
755 num_additional_swms, &joined_tab);
758 w_tab_save = w->lookup_table;
759 w->lookup_table = joined_tab;
762 if (image == WIMLIB_ALL_IMAGES) {
763 extract_flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
764 ret = extract_all_images(w, target, extract_flags,
767 extract_flags &= ~WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
768 ret = extract_single_image(w, image, target, extract_flags,
772 if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
773 WIMLIB_EXTRACT_FLAG_HARDLINK))
775 for_lookup_table_entry(w->lookup_table,
776 lte_free_extracted_file,
780 if (num_additional_swms) {
781 free_lookup_table(w->lookup_table);
782 w->lookup_table = w_tab_save;