4 * Support for extracting WIM files.
6 * This code does NOT contain any filesystem-specific features. In particular,
7 * security information (i.e. file permissions) and alternate data streams are
8 * ignored, except possibly to read an alternate data stream that contains
13 * Copyright (C) 2010 Carl Thijssen
14 * Copyright (C) 2012 Eric Biggers
16 * This file is part of wimlib, a library for working with WIM files.
18 * wimlib is free software; you can redistribute it and/or modify it under the
19 * terms of the GNU General Public License as published by the Free
20 * Software Foundation; either version 3 of the License, or (at your option)
23 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
24 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
25 * A PARTICULAR PURPOSE. See the GNU General Public License for more
28 * You should have received a copy of the GNU General Public License
29 * along with wimlib; if not, see http://www.gnu.org/licenses/.
50 #include "lookup_table.h"
51 #include "timestamp.h"
52 #include "wimlib_internal.h"
56 static int extract_regular_file_linked(const struct dentry *dentry,
57 const char *output_dir,
58 const char *output_path,
60 struct lookup_table_entry *lte)
62 /* This mode overrides the normal hard-link extraction and
63 * instead either symlinks or hardlinks *all* identical files in
64 * the WIM, even if they are in a different image (in the case
65 * of a multi-image extraction) */
66 wimlib_assert(lte->extracted_file != NULL);
68 if (extract_flags & WIMLIB_EXTRACT_FLAG_HARDLINK) {
69 if (link(lte->extracted_file, output_path) != 0) {
70 ERROR_WITH_ERRNO("Failed to hard link "
72 output_path, lte->extracted_file);
73 return WIMLIB_ERR_LINK;
76 int num_path_components;
77 int num_output_dir_path_components;
78 size_t extracted_file_len;
83 wimlib_assert(extract_flags & WIMLIB_EXTRACT_FLAG_SYMLINK);
86 get_num_path_components(dentry->full_path_utf8) - 1;
87 num_output_dir_path_components =
88 get_num_path_components(output_dir);
90 if (extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) {
91 num_path_components++;
92 num_output_dir_path_components--;
94 extracted_file_len = strlen(lte->extracted_file);
96 char buf[extracted_file_len + 3 * num_path_components + 1];
99 for (i = 0; i < num_path_components; i++) {
104 p2 = lte->extracted_file;
107 while (num_output_dir_path_components--)
108 p2 = path_next_part(p2, NULL);
110 if (symlink(buf, output_path) != 0) {
111 ERROR_WITH_ERRNO("Failed to symlink `%s' to "
113 buf, lte->extracted_file);
114 return WIMLIB_ERR_LINK;
121 static int extract_regular_file_unlinked(WIMStruct *w,
122 struct dentry *dentry,
123 const char *output_path,
125 struct lookup_table_entry *lte)
127 /* Normal mode of extraction. Regular files and hard links are
128 * extracted in the way that they appear in the WIM. */
132 struct inode *inode = dentry->d_inode;
134 if (!((extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE)
135 && (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
136 WIMLIB_EXTRACT_FLAG_HARDLINK))))
138 /* If the dentry is one of a hard link set of at least 2
139 * dentries and one of the other dentries has already been
140 * extracted, make a hard link to the file corresponding to this
141 * already-extracted directory. Otherwise, extract the file,
142 * and set the inode->extracted_file field so that other
143 * dentries in the hard link group can link to it. */
144 if (inode->link_count > 1) {
145 if (inode->extracted_file) {
146 DEBUG("Extracting hard link `%s' => `%s'",
147 output_path, inode->extracted_file);
148 if (link(inode->extracted_file, output_path) != 0) {
149 ERROR_WITH_ERRNO("Failed to hard link "
152 inode->extracted_file);
153 return WIMLIB_ERR_LINK;
157 FREE(inode->extracted_file);
158 inode->extracted_file = STRDUP(output_path);
159 if (!inode->extracted_file) {
160 ERROR("Failed to allocate memory for filename");
161 return WIMLIB_ERR_NOMEM;
166 /* Extract the contents of the file to @output_path. */
168 out_fd = open(output_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
170 ERROR_WITH_ERRNO("Failed to open the file `%s' for writing",
172 return WIMLIB_ERR_OPEN;
176 /* Empty file with no lookup table entry */
177 DEBUG("Empty file `%s'.", output_path);
182 ret = extract_full_wim_resource_to_fd(lte, out_fd);
184 ERROR("Failed to extract resource to `%s'", output_path);
189 if (close(out_fd) != 0) {
190 ERROR_WITH_ERRNO("Failed to close file `%s'", output_path);
191 ret = WIMLIB_ERR_WRITE;
197 * Extracts a regular file from the WIM archive.
199 static int extract_regular_file(WIMStruct *w,
200 struct dentry *dentry,
201 const char *output_dir,
202 const char *output_path,
205 struct lookup_table_entry *lte;
206 const struct inode *inode = dentry->d_inode;
208 lte = inode_unnamed_lte(inode, w->lookup_table);
210 if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
211 WIMLIB_EXTRACT_FLAG_HARDLINK)) && lte) {
212 if (lte->extracted_file) {
213 return extract_regular_file_linked(dentry, output_dir,
217 lte->extracted_file = STRDUP(output_path);
218 if (!lte->extracted_file)
219 return WIMLIB_ERR_NOMEM;
223 return extract_regular_file_unlinked(w, dentry, output_path,
228 static int extract_symlink(const struct dentry *dentry, const char *output_path,
232 ssize_t ret = inode_readlink(dentry->d_inode, target,
233 sizeof(target), w, 0);
235 ERROR("Could not read the symbolic link from dentry `%s'",
236 dentry->full_path_utf8);
237 return WIMLIB_ERR_INVALID_DENTRY;
239 ret = symlink(target, output_path);
241 ERROR_WITH_ERRNO("Failed to symlink `%s' to `%s'",
242 output_path, target);
243 return WIMLIB_ERR_LINK;
249 * Extracts a directory from the WIM archive.
251 * @dentry: The directory entry for the directory.
252 * @output_path: The path to which the directory is to be extracted to.
253 * @return: True on success, false on failure.
255 static int extract_directory(const char *output_path, bool is_root)
259 ret = stat(output_path, &stbuf);
261 if (S_ISDIR(stbuf.st_mode)) {
263 /*WARNING("`%s' already exists", output_path);*/
266 ERROR("`%s' is not a directory", output_path);
267 return WIMLIB_ERR_MKDIR;
270 if (errno != ENOENT) {
271 ERROR_WITH_ERRNO("Failed to stat `%s'", output_path);
272 return WIMLIB_ERR_STAT;
275 /* Compute the output path directory to the directory. */
276 if (mkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP |
277 S_IROTH | S_IXOTH) != 0) {
278 ERROR_WITH_ERRNO("Cannot create directory `%s'",
280 return WIMLIB_ERR_MKDIR;
285 struct extract_args {
288 const char *output_dir;
289 unsigned num_lutimes_warnings;
293 * Extracts a file, directory, or symbolic link from the WIM archive. For use
294 * in for_dentry_in_tree().
296 static int extract_dentry(struct dentry *dentry, void *arg)
298 struct extract_args *args = arg;
299 WIMStruct *w = args->w;
300 int extract_flags = args->extract_flags;
301 size_t len = strlen(args->output_dir);
302 char output_path[len + dentry->full_path_utf8_len + 1];
304 if (extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS)
305 if (inode_unnamed_lte(dentry->d_inode, w->lookup_table) != NULL)
308 if (extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) {
309 wimlib_assert(dentry->full_path_utf8);
310 puts(dentry->full_path_utf8);
313 memcpy(output_path, args->output_dir, len);
314 memcpy(output_path + len, dentry->full_path_utf8, dentry->full_path_utf8_len);
315 output_path[len + dentry->full_path_utf8_len] = '\0';
318 if (dentry_is_symlink(dentry))
319 return extract_symlink(dentry, output_path, w);
320 else if (dentry_is_directory(dentry))
321 return extract_directory(output_path, dentry_is_root(dentry));
323 return extract_regular_file(w, dentry, args->output_dir,
324 output_path, extract_flags);
327 /* Apply timestamp to extracted file */
328 static int apply_dentry_timestamps(struct dentry *dentry, void *arg)
330 struct extract_args *args = arg;
331 size_t len = strlen(args->output_dir);
332 char output_path[len + dentry->full_path_utf8_len + 1];
333 const struct inode *inode = dentry->d_inode;
336 memcpy(output_path, args->output_dir, len);
337 memcpy(output_path + len, dentry->full_path_utf8, dentry->full_path_utf8_len);
338 output_path[len + dentry->full_path_utf8_len] = '\0';
340 struct timeval tv[2];
341 wim_timestamp_to_timeval(inode->last_access_time, &tv[0]);
342 wim_timestamp_to_timeval(inode->last_write_time, &tv[1]);
344 ret = lutimes(output_path, tv);
351 if (errno == ENOSYS) {
353 buf.actime = wim_timestamp_to_unix(inode->last_access_time);
354 buf.modtime = wim_timestamp_to_unix(inode->last_write_time);
355 if (utime(output_path, &buf) == 0)
359 if (errno != ENOSYS || args->num_lutimes_warnings < 10) {
360 /*WARNING("Failed to set timestamp on file `%s': %s",*/
361 /*output_path, strerror(errno));*/
362 args->num_lutimes_warnings++;
369 static int dentry_add_streams_for_extraction(struct dentry *dentry,
373 struct list_head *stream_list;
374 struct lookup_table_entry *lte;
376 lte = inode_unnamed_lte(dentry->d_inode, w->lookup_table);
378 if (++lte->out_refcnt == 1) {
379 INIT_LIST_HEAD(<e->dentry_list);
380 stream_list = w->private;
381 list_add_tail(<e->staging_list, stream_list);
383 list_add_tail(&dentry->tmp_list, <e->dentry_list);
388 static int cmp_streams_by_wim_position(const void *p1, const void *p2)
390 const struct lookup_table_entry *lte1, *lte2;
391 lte1 = *(const struct lookup_table_entry**)p1;
392 lte2 = *(const struct lookup_table_entry**)p2;
393 if (lte1->resource_entry.offset < lte2->resource_entry.offset)
395 else if (lte1->resource_entry.offset > lte2->resource_entry.offset)
401 static int sort_stream_list_by_wim_position(struct list_head *stream_list)
403 struct list_head *cur;
405 struct lookup_table_entry **array;
409 DEBUG("Sorting stream list by wim position");
412 list_for_each(cur, stream_list)
414 array_size = num_streams * sizeof(array[0]);
416 DEBUG("num_streams = %zu", num_streams);
418 array = MALLOC(array_size);
420 ERROR("Failed to allocate %zu bytes to sort stream entries",
422 return WIMLIB_ERR_NOMEM;
424 cur = stream_list->next;
425 for (i = 0; i < num_streams; i++) {
426 array[i] = container_of(cur, struct lookup_table_entry, staging_list);
430 qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position);
432 INIT_LIST_HEAD(stream_list);
433 for (i = 0; i < num_streams; i++)
434 list_add_tail(&array[i]->staging_list, stream_list);
439 static u64 calculate_bytes_to_extract(struct list_head *stream_list,
442 struct lookup_table_entry *lte;
443 struct dentry *dentry;
445 list_for_each_entry(lte, stream_list, staging_list) {
446 u64 size = wim_resource_size(lte);
448 (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
452 list_for_each_entry(dentry, <e->dentry_list,
455 dentry->d_inode->found = false;
457 list_for_each_entry(dentry, <e->dentry_list,
460 if (!dentry->d_inode->found) {
461 dentry->d_inode->found = true;
470 static int extract_single_image(WIMStruct *w, int image,
471 const char *output_dir, int extract_flags)
475 const char *image_name;
477 DEBUG("Extracting image %d", image);
479 ret = select_wim_image(w, image);
483 root = wim_root_dentry(w);
485 struct extract_args args = {
487 .extract_flags = extract_flags,
488 .output_dir = output_dir,
489 .num_lutimes_warnings = 0,
492 image_name = wimlib_get_image_name(w, image);
494 image_name = "unnamed";
496 if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) {
497 for_lookup_table_entry(w->lookup_table, lte_zero_out_refcnt,
499 args.extract_flags |= WIMLIB_EXTRACT_FLAG_NO_STREAMS;
500 if (args.extract_flags & WIMLIB_EXTRACT_FLAG_SHOW_PROGRESS) {
501 printf("Creating directory structure for image %d (%s)...\n",
505 if (args.extract_flags & WIMLIB_EXTRACT_FLAG_SHOW_PROGRESS) {
506 printf("Extracting image %d (%s)...\n",
511 ret = for_dentry_in_tree(root, extract_dentry, &args);
515 if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) {
516 struct list_head stream_list;
517 struct lookup_table_entry *lte;
518 struct lookup_table_entry *tmp;
519 struct dentry *dentry;
524 unsigned cur_percent;
526 INIT_LIST_HEAD(&stream_list);
527 w->private = &stream_list;
528 for_dentry_in_tree(root, dentry_add_streams_for_extraction, w);
529 ret = sort_stream_list_by_wim_position(&stream_list);
530 args.extract_flags &= ~WIMLIB_EXTRACT_FLAG_NO_STREAMS;
532 WARNING("Falling back to non-sequential image extraction");
533 ret = for_dentry_in_tree(root, extract_dentry, &args);
539 total_size = calculate_bytes_to_extract(&stream_list, args.extract_flags);
540 one_percent = total_size / 100;
544 puts("Extracting files...");
545 list_for_each_entry_safe(lte, tmp, &stream_list, staging_list) {
546 list_del(<e->staging_list);
547 list_for_each_entry(dentry, <e->dentry_list, tmp_list) {
548 if ((!dentry->d_inode->extracted_file) &&
549 (args.extract_flags & WIMLIB_EXTRACT_FLAG_SHOW_PROGRESS))
551 show_stream_op_progress(&cur_size, &next_size,
552 total_size, one_percent,
556 ret = extract_dentry(dentry, &args);
561 finish_stream_op_progress(total_size, "extracted");
564 return for_dentry_in_tree_depth(root, apply_dentry_timestamps, &args);
568 /* Extracts all images from the WIM to @output_dir, with the images placed in
569 * subdirectories named by their image names. */
570 static int extract_all_images(WIMStruct *w, const char *output_dir,
573 size_t image_name_max_len = max(xml_get_max_image_name_len(w), 20);
574 size_t output_path_len = strlen(output_dir);
575 char buf[output_path_len + 1 + image_name_max_len + 1];
578 const char *image_name;
580 DEBUG("Attempting to extract all images from `%s' to `%s'",
581 w->filename, output_dir);
583 ret = extract_directory(output_dir, true);
587 memcpy(buf, output_dir, output_path_len);
588 buf[output_path_len] = '/';
589 for (image = 1; image <= w->hdr.image_count; image++) {
591 image_name = wimlib_get_image_name(w, image);
593 strcpy(buf + output_path_len + 1, image_name);
595 /* Image name is empty. Use image number instead */
596 sprintf(buf + output_path_len + 1, "%d", image);
598 ret = extract_single_image(w, image, buf, extract_flags);
605 /* Extracts a single image or all images from a WIM file. */
606 WIMLIBAPI int wimlib_extract_image(WIMStruct *w, int image,
607 const char *output_dir,
609 WIMStruct **additional_swms,
610 unsigned num_additional_swms)
612 struct lookup_table *joined_tab, *w_tab_save;
615 DEBUG("w->filename = %s, image = %d, output_dir = %s, flags = 0x%x, "
616 "num_additional_swms = %u",
617 w->filename, image, output_dir, extract_flags, num_additional_swms);
619 if (!w || !output_dir)
620 return WIMLIB_ERR_INVALID_PARAM;
622 extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
624 if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
625 == (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
626 return WIMLIB_ERR_INVALID_PARAM;
628 ret = verify_swm_set(w, additional_swms, num_additional_swms);
632 if (num_additional_swms) {
633 ret = new_joined_lookup_table(w, additional_swms,
634 num_additional_swms, &joined_tab);
637 w_tab_save = w->lookup_table;
638 w->lookup_table = joined_tab;
641 if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
642 WIMLIB_EXTRACT_FLAG_HARDLINK))
644 for_lookup_table_entry(w->lookup_table,
645 lte_zero_extracted_file,
647 extract_flags &= ~WIMLIB_EXTRACT_FLAG_SEQUENTIAL;
650 if (image == WIM_ALL_IMAGES) {
651 extract_flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
652 ret = extract_all_images(w, output_dir, extract_flags);
654 extract_flags &= ~WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
655 ret = extract_single_image(w, image, output_dir, extract_flags);
657 if (num_additional_swms) {
658 free_lookup_table(w->lookup_table);
659 w->lookup_table = w_tab_save;
662 if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
663 WIMLIB_EXTRACT_FLAG_HARDLINK))
665 for_lookup_table_entry(w->lookup_table,
666 lte_free_extracted_file,