90b6ce39eca37c69e36b420a6a775c65c21c8eab
[wimlib] / src / extract.c
1 /*
2  * extract.c
3  *
4  * Support for extracting WIM files.
5  *
6  * This code does NOT contain any filesystem-specific features.  In particular,
7  * security information (i.e. file permissions) and alternate data streams are
8  * ignored, except possibly to read an alternate data stream that contains
9  * symbolic link data.
10  */
11
12 /*
13  * Copyright (C) 2010 Carl Thijssen
14  * Copyright (C) 2012 Eric Biggers
15  *
16  * This file is part of wimlib, a library for working with WIM files.
17  *
18  * wimlib is free software; you can redistribute it and/or modify it under the
19  * terms of the GNU General Public License as published by the Free
20  * Software Foundation; either version 3 of the License, or (at your option)
21  * any later version.
22  *
23  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
24  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
25  * A PARTICULAR PURPOSE. See the GNU General Public License for more
26  * details.
27  *
28  * You should have received a copy of the GNU General Public License
29  * along with wimlib; if not, see http://www.gnu.org/licenses/.
30  */
31
32
33 #include "config.h"
34
35 #include <dirent.h>
36 #include <errno.h>
37 #include <fcntl.h>
38 #include <string.h>
39 #include <sys/stat.h>
40 #include <stdlib.h>
41 #include <sys/time.h>
42
43 #ifdef HAVE_UTIME_H
44 #include <utime.h>
45 #endif
46
47 #include <unistd.h>
48
49 #include "dentry.h"
50 #include "lookup_table.h"
51 #include "timestamp.h"
52 #include "wimlib_internal.h"
53 #include "xml.h"
54
55
56 static int extract_regular_file_linked(const struct dentry *dentry,
57                                        const char *output_dir,
58                                        const char *output_path,
59                                        int extract_flags,
60                                        struct lookup_table_entry *lte)
61 {
62         /* This mode overrides the normal hard-link extraction and
63          * instead either symlinks or hardlinks *all* identical files in
64          * the WIM, even if they are in a different image (in the case
65          * of a multi-image extraction) */
66         wimlib_assert(lte->extracted_file != NULL);
67
68         if (extract_flags & WIMLIB_EXTRACT_FLAG_HARDLINK) {
69                 if (link(lte->extracted_file, output_path) != 0) {
70                         ERROR_WITH_ERRNO("Failed to hard link "
71                                          "`%s' to `%s'",
72                                          output_path, lte->extracted_file);
73                         return WIMLIB_ERR_LINK;
74                 }
75         } else {
76                 int num_path_components;
77                 int num_output_dir_path_components;
78                 size_t extracted_file_len;
79                 char *p;
80                 const char *p2;
81                 size_t i;
82
83                 wimlib_assert(extract_flags & WIMLIB_EXTRACT_FLAG_SYMLINK);
84
85                 num_path_components =
86                         get_num_path_components(dentry->full_path_utf8) - 1;
87                 num_output_dir_path_components =
88                         get_num_path_components(output_dir);
89
90                 if (extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) {
91                         num_path_components++;
92                         num_output_dir_path_components--;
93                 }
94                 extracted_file_len = strlen(lte->extracted_file);
95
96                 char buf[extracted_file_len + 3 * num_path_components + 1];
97                 p = &buf[0];
98
99                 for (i = 0; i < num_path_components; i++) {
100                         *p++ = '.';
101                         *p++ = '.';
102                         *p++ = '/';
103                 }
104                 p2 = lte->extracted_file;
105                 while (*p2 == '/')
106                         p2++;
107                 while (num_output_dir_path_components--)
108                         p2 = path_next_part(p2, NULL);
109                 strcpy(p, p2);
110                 if (symlink(buf, output_path) != 0) {
111                         ERROR_WITH_ERRNO("Failed to symlink `%s' to "
112                                          "`%s'",
113                                          buf, lte->extracted_file);
114                         return WIMLIB_ERR_LINK;
115                 }
116
117         }
118         return 0;
119 }
120
121 static int extract_regular_file_unlinked(WIMStruct *w,
122                                          struct dentry *dentry,
123                                          const char *output_path,
124                                          int extract_flags,
125                                          struct lookup_table_entry *lte)
126 {
127         /* Normal mode of extraction.  Regular files and hard links are
128          * extracted in the way that they appear in the WIM. */
129
130         int out_fd;
131         int ret;
132         struct inode *inode = dentry->d_inode;
133
134         if (!((extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE)
135                 && (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
136                                      WIMLIB_EXTRACT_FLAG_HARDLINK))))
137         {
138                 /* If the dentry is one of a hard link set of at least 2
139                  * dentries and one of the other dentries has already been
140                  * extracted, make a hard link to the file corresponding to this
141                  * already-extracted directory.  Otherwise, extract the file,
142                  * and set the inode->extracted_file field so that other
143                  * dentries in the hard link group can link to it. */
144                 if (inode->link_count > 1) {
145                         if (inode->extracted_file) {
146                                 DEBUG("Extracting hard link `%s' => `%s'",
147                                       output_path, inode->extracted_file);
148                                 if (link(inode->extracted_file, output_path) != 0) {
149                                         ERROR_WITH_ERRNO("Failed to hard link "
150                                                          "`%s' to `%s'",
151                                                          output_path,
152                                                          inode->extracted_file);
153                                         return WIMLIB_ERR_LINK;
154                                 }
155                                 return 0;
156                         }
157                         FREE(inode->extracted_file);
158                         inode->extracted_file = STRDUP(output_path);
159                         if (!inode->extracted_file) {
160                                 ERROR("Failed to allocate memory for filename");
161                                 return WIMLIB_ERR_NOMEM;
162                         }
163                 }
164         }
165
166         /* Extract the contents of the file to @output_path. */
167
168         out_fd = open(output_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
169         if (out_fd == -1) {
170                 ERROR_WITH_ERRNO("Failed to open the file `%s' for writing",
171                                  output_path);
172                 return WIMLIB_ERR_OPEN;
173         }
174
175         if (!lte) {
176                 /* Empty file with no lookup table entry */
177                 DEBUG("Empty file `%s'.", output_path);
178                 ret = 0;
179                 goto out;
180         }
181
182         ret = extract_full_wim_resource_to_fd(lte, out_fd);
183         if (ret != 0) {
184                 ERROR("Failed to extract resource to `%s'", output_path);
185                 goto out;
186         }
187
188 out:
189         if (close(out_fd) != 0) {
190                 ERROR_WITH_ERRNO("Failed to close file `%s'", output_path);
191                 ret = WIMLIB_ERR_WRITE;
192         }
193         return ret;
194 }
195
196 /*
197  * Extracts a regular file from the WIM archive.
198  */
199 static int extract_regular_file(WIMStruct *w,
200                                 struct dentry *dentry,
201                                 const char *output_dir,
202                                 const char *output_path,
203                                 int extract_flags)
204 {
205         struct lookup_table_entry *lte;
206         const struct inode *inode = dentry->d_inode;
207
208         lte = inode_unnamed_lte(inode, w->lookup_table);
209
210         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
211                               WIMLIB_EXTRACT_FLAG_HARDLINK)) && lte) {
212                 if (lte->extracted_file) {
213                         return extract_regular_file_linked(dentry, output_dir,
214                                                            output_path,
215                                                            extract_flags, lte);
216                 } else {
217                         lte->extracted_file = STRDUP(output_path);
218                         if (!lte->extracted_file)
219                                 return WIMLIB_ERR_NOMEM;
220                 }
221         }
222
223         return extract_regular_file_unlinked(w, dentry, output_path,
224                                              extract_flags, lte);
225
226 }
227
228 static int extract_symlink(const struct dentry *dentry, const char *output_path,
229                            const WIMStruct *w)
230 {
231         char target[4096];
232         ssize_t ret = inode_readlink(dentry->d_inode, target,
233                                      sizeof(target), w, 0);
234         if (ret <= 0) {
235                 ERROR("Could not read the symbolic link from dentry `%s'",
236                       dentry->full_path_utf8);
237                 return WIMLIB_ERR_INVALID_DENTRY;
238         }
239         ret = symlink(target, output_path);
240         if (ret != 0) {
241                 ERROR_WITH_ERRNO("Failed to symlink `%s' to `%s'",
242                                  output_path, target);
243                 return WIMLIB_ERR_LINK;
244         }
245         return 0;
246 }
247
248 /*
249  * Extracts a directory from the WIM archive.
250  *
251  * @dentry:             The directory entry for the directory.
252  * @output_path:        The path to which the directory is to be extracted to.
253  * @return:             True on success, false on failure.
254  */
255 static int extract_directory(const char *output_path, bool is_root)
256 {
257         int ret;
258         struct stat stbuf;
259         ret = stat(output_path, &stbuf);
260         if (ret == 0) {
261                 if (S_ISDIR(stbuf.st_mode)) {
262                         /*if (!is_root)*/
263                                 /*WARNING("`%s' already exists", output_path);*/
264                         return 0;
265                 } else {
266                         ERROR("`%s' is not a directory", output_path);
267                         return WIMLIB_ERR_MKDIR;
268                 }
269         } else {
270                 if (errno != ENOENT) {
271                         ERROR_WITH_ERRNO("Failed to stat `%s'", output_path);
272                         return WIMLIB_ERR_STAT;
273                 }
274         }
275         /* Compute the output path directory to the directory. */
276         if (mkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP |
277                                S_IROTH | S_IXOTH) != 0) {
278                 ERROR_WITH_ERRNO("Cannot create directory `%s'",
279                                  output_path);
280                 return WIMLIB_ERR_MKDIR;
281         }
282         return 0;
283 }
284
285 struct extract_args {
286         WIMStruct *w;
287         int extract_flags;
288         const char *output_dir;
289         unsigned num_lutimes_warnings;
290 };
291
292 /*
293  * Extracts a file, directory, or symbolic link from the WIM archive.  For use
294  * in for_dentry_in_tree().
295  */
296 static int extract_dentry(struct dentry *dentry, void *arg)
297 {
298         struct extract_args *args = arg;
299         WIMStruct *w = args->w;
300         int extract_flags = args->extract_flags;
301         size_t len = strlen(args->output_dir);
302         char output_path[len + dentry->full_path_utf8_len + 1];
303
304         if (extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS)
305                 if (inode_unnamed_lte(dentry->d_inode, w->lookup_table) != NULL)
306                         return 0;
307
308         if (extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) {
309                 wimlib_assert(dentry->full_path_utf8);
310                 puts(dentry->full_path_utf8);
311         }
312
313         memcpy(output_path, args->output_dir, len);
314         memcpy(output_path + len, dentry->full_path_utf8, dentry->full_path_utf8_len);
315         output_path[len + dentry->full_path_utf8_len] = '\0';
316
317
318         if (dentry_is_symlink(dentry))
319                 return extract_symlink(dentry, output_path, w);
320         else if (dentry_is_directory(dentry))
321                 return extract_directory(output_path, dentry_is_root(dentry));
322         else
323                 return extract_regular_file(w, dentry, args->output_dir,
324                                             output_path, extract_flags);
325 }
326
327 /* Apply timestamp to extracted file */
328 static int apply_dentry_timestamps(struct dentry *dentry, void *arg)
329 {
330         struct extract_args *args = arg;
331         size_t len = strlen(args->output_dir);
332         char output_path[len + dentry->full_path_utf8_len + 1];
333         const struct inode *inode = dentry->d_inode;
334         int ret;
335
336         memcpy(output_path, args->output_dir, len);
337         memcpy(output_path + len, dentry->full_path_utf8, dentry->full_path_utf8_len);
338         output_path[len + dentry->full_path_utf8_len] = '\0';
339
340         struct timeval tv[2];
341         wim_timestamp_to_timeval(inode->last_access_time, &tv[0]);
342         wim_timestamp_to_timeval(inode->last_write_time, &tv[1]);
343         #ifdef HAVE_LUTIMES
344         ret = lutimes(output_path, tv);
345         #else
346         ret = -1;
347         errno = ENOSYS;
348         #endif
349         if (ret != 0) {
350                 #ifdef HAVE_UTIME
351                 if (errno == ENOSYS) {
352                         struct utimbuf buf;
353                         buf.actime = wim_timestamp_to_unix(inode->last_access_time);
354                         buf.modtime = wim_timestamp_to_unix(inode->last_write_time);
355                         if (utime(output_path, &buf) == 0)
356                                 return 0;
357                 }
358                 #endif
359                 if (errno != ENOSYS || args->num_lutimes_warnings < 10) {
360                         /*WARNING("Failed to set timestamp on file `%s': %s",*/
361                                 /*output_path, strerror(errno));*/
362                         args->num_lutimes_warnings++;
363                 }
364         }
365         return 0;
366 }
367
368
369 static int dentry_add_streams_for_extraction(struct dentry *dentry,
370                                              void *wim)
371 {
372         WIMStruct *w = wim;
373         struct list_head *stream_list;
374         struct lookup_table_entry *lte;
375
376         lte = inode_unnamed_lte(dentry->d_inode, w->lookup_table);
377         if (lte) {
378                 if (++lte->out_refcnt == 1) {
379                         INIT_LIST_HEAD(&lte->dentry_list);
380                         stream_list = w->private;
381                         list_add_tail(&lte->staging_list, stream_list);
382                 }
383                 list_add_tail(&dentry->tmp_list, &lte->dentry_list);
384         }
385         return 0;
386 }
387
388 static int cmp_streams_by_wim_position(const void *p1, const void *p2)
389 {
390         const struct lookup_table_entry *lte1, *lte2;
391         lte1 = *(const struct lookup_table_entry**)p1;
392         lte2 = *(const struct lookup_table_entry**)p2;
393         if (lte1->resource_entry.offset < lte2->resource_entry.offset)
394                 return -1;
395         else if (lte1->resource_entry.offset > lte2->resource_entry.offset)
396                 return 1;
397         else
398                 return 0;
399 }
400
401 static int sort_stream_list_by_wim_position(struct list_head *stream_list)
402 {
403         struct list_head *cur;
404         size_t num_streams;
405         struct lookup_table_entry **array;
406         size_t i;
407         size_t array_size;
408
409         DEBUG("Sorting stream list by wim position");
410
411         num_streams = 0;
412         list_for_each(cur, stream_list)
413                 num_streams++;
414         array_size = num_streams * sizeof(array[0]);
415
416         DEBUG("num_streams = %zu", num_streams);
417
418         array = MALLOC(array_size);
419         if (!array) {
420                 ERROR("Failed to allocate %zu bytes to sort stream entries",
421                       array_size);
422                 return WIMLIB_ERR_NOMEM;
423         }
424         cur = stream_list->next;
425         for (i = 0; i < num_streams; i++) {
426                 array[i] = container_of(cur, struct lookup_table_entry, staging_list);
427                 cur = cur->next;
428         }
429
430         qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position);
431
432         INIT_LIST_HEAD(stream_list);
433         for (i = 0; i < num_streams; i++)
434                 list_add_tail(&array[i]->staging_list, stream_list);
435         FREE(array);
436         return 0;
437 }
438
439 static u64 calculate_bytes_to_extract(struct list_head *stream_list,
440                                       int extract_flags)
441 {
442         struct lookup_table_entry *lte;
443         struct dentry *dentry;
444         u64 total_size = 0;
445         list_for_each_entry(lte, stream_list, staging_list) {
446                 u64 size = wim_resource_size(lte);
447                 if (extract_flags &
448                     (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
449                 {
450                         total_size += size;
451                 } else {
452                         list_for_each_entry(dentry, &lte->dentry_list,
453                                             tmp_list)
454                         {
455                                 dentry->d_inode->found = false;
456                         }
457                         list_for_each_entry(dentry, &lte->dentry_list,
458                                             tmp_list)
459                         {
460                                 if (!dentry->d_inode->found) {
461                                         dentry->d_inode->found = true;
462                                         total_size += size;
463                                 }
464                         }
465                 }
466         }
467         return total_size;
468 }
469
470 static int extract_single_image(WIMStruct *w, int image,
471                                 const char *output_dir, int extract_flags)
472 {
473         int ret;
474         struct dentry *root;
475         const char *image_name;
476
477         DEBUG("Extracting image %d", image);
478
479         ret = select_wim_image(w, image);
480         if (ret != 0)
481                 return ret;
482
483         root = wim_root_dentry(w);
484
485         struct extract_args args = {
486                 .w                    = w,
487                 .extract_flags        = extract_flags,
488                 .output_dir           = output_dir,
489                 .num_lutimes_warnings = 0,
490         };
491
492         image_name = wimlib_get_image_name(w, image);
493         if (!image_name)
494                 image_name = "unnamed";
495
496         if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) {
497                 for_lookup_table_entry(w->lookup_table, lte_zero_out_refcnt,
498                                        NULL);
499                 args.extract_flags |= WIMLIB_EXTRACT_FLAG_NO_STREAMS;
500                 if (args.extract_flags & WIMLIB_EXTRACT_FLAG_SHOW_PROGRESS) {
501                         printf("Creating directory structure for image %d (%s)...\n",
502                                image, image_name);
503                 }
504         } else {
505                 if (args.extract_flags & WIMLIB_EXTRACT_FLAG_SHOW_PROGRESS) {
506                         printf("Extracting image %d (%s)...\n",
507                                image, image_name);
508                 }
509         }
510
511         ret = for_dentry_in_tree(root, extract_dentry, &args);
512         if (ret != 0)
513                 return ret;
514
515         if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) {
516                 struct list_head stream_list;
517                 struct lookup_table_entry *lte;
518                 struct lookup_table_entry *tmp;
519                 struct dentry *dentry;
520                 u64 total_size;
521                 u64 cur_size;
522                 u64 next_size;
523                 u64 one_percent;
524                 unsigned cur_percent;
525
526                 INIT_LIST_HEAD(&stream_list);
527                 w->private = &stream_list;
528                 for_dentry_in_tree(root, dentry_add_streams_for_extraction, w);
529                 ret = sort_stream_list_by_wim_position(&stream_list);
530                 args.extract_flags &= ~WIMLIB_EXTRACT_FLAG_NO_STREAMS;
531                 if (ret != 0) {
532                         WARNING("Falling back to non-sequential image extraction");
533                         ret = for_dentry_in_tree(root, extract_dentry, &args);
534                         if (ret != 0)
535                                 return ret;
536                         goto out;
537                 }
538
539                 total_size = calculate_bytes_to_extract(&stream_list, args.extract_flags);
540                 one_percent = total_size / 100;
541                 cur_size = 0;
542                 next_size = 0;
543                 cur_percent = 0;
544                 puts("Extracting files...");
545                 list_for_each_entry_safe(lte, tmp, &stream_list, staging_list) {
546                         list_del(&lte->staging_list);
547                         list_for_each_entry(dentry, &lte->dentry_list, tmp_list) {
548                                 if ((!dentry->d_inode->extracted_file) &&
549                                      (args.extract_flags & WIMLIB_EXTRACT_FLAG_SHOW_PROGRESS))
550                                 {
551                                         show_stream_op_progress(&cur_size, &next_size,
552                                                                 total_size, one_percent,
553                                                                 &cur_percent, lte,
554                                                                 "extracted");
555                                 }
556                                 ret = extract_dentry(dentry, &args);
557                                 if (ret != 0)
558                                         return ret;
559                         }
560                 }
561                 finish_stream_op_progress(total_size, "extracted");
562         }
563 out:
564         return for_dentry_in_tree_depth(root, apply_dentry_timestamps, &args);
565 }
566
567
568 /* Extracts all images from the WIM to @output_dir, with the images placed in
569  * subdirectories named by their image names. */
570 static int extract_all_images(WIMStruct *w, const char *output_dir,
571                               int extract_flags)
572 {
573         size_t image_name_max_len = max(xml_get_max_image_name_len(w), 20);
574         size_t output_path_len = strlen(output_dir);
575         char buf[output_path_len + 1 + image_name_max_len + 1];
576         int ret;
577         int image;
578         const char *image_name;
579
580         DEBUG("Attempting to extract all images from `%s' to `%s'",
581               w->filename, output_dir);
582
583         ret = extract_directory(output_dir, true);
584         if (ret != 0)
585                 return ret;
586
587         memcpy(buf, output_dir, output_path_len);
588         buf[output_path_len] = '/';
589         for (image = 1; image <= w->hdr.image_count; image++) {
590
591                 image_name = wimlib_get_image_name(w, image);
592                 if (*image_name) {
593                         strcpy(buf + output_path_len + 1, image_name);
594                 } else {
595                         /* Image name is empty. Use image number instead */
596                         sprintf(buf + output_path_len + 1, "%d", image);
597                 }
598                 ret = extract_single_image(w, image, buf, extract_flags);
599                 if (ret != 0)
600                         return ret;
601         }
602         return 0;
603 }
604
605 /* Extracts a single image or all images from a WIM file. */
606 WIMLIBAPI int wimlib_extract_image(WIMStruct *w, int image,
607                                    const char *output_dir,
608                                    int extract_flags,
609                                    WIMStruct **additional_swms,
610                                    unsigned num_additional_swms)
611 {
612         struct lookup_table *joined_tab, *w_tab_save;
613         int ret;
614
615         DEBUG("w->filename = %s, image = %d, output_dir = %s, flags = 0x%x, "
616               "num_additional_swms = %u",
617               w->filename, image, output_dir, extract_flags, num_additional_swms);
618
619         if (!w || !output_dir)
620                 return WIMLIB_ERR_INVALID_PARAM;
621
622         extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
623
624         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
625                         == (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
626                 return WIMLIB_ERR_INVALID_PARAM;
627
628         ret = verify_swm_set(w, additional_swms, num_additional_swms);
629         if (ret != 0)
630                 return ret;
631
632         if (num_additional_swms) {
633                 ret = new_joined_lookup_table(w, additional_swms,
634                                               num_additional_swms, &joined_tab);
635                 if (ret != 0)
636                         return ret;
637                 w_tab_save = w->lookup_table;
638                 w->lookup_table = joined_tab;
639         }
640
641         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
642                              WIMLIB_EXTRACT_FLAG_HARDLINK))
643         {
644                 for_lookup_table_entry(w->lookup_table,
645                                        lte_zero_extracted_file,
646                                        NULL);
647                 extract_flags &= ~WIMLIB_EXTRACT_FLAG_SEQUENTIAL;
648         }
649
650         if (image == WIM_ALL_IMAGES) {
651                 extract_flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
652                 ret = extract_all_images(w, output_dir, extract_flags);
653         } else {
654                 extract_flags &= ~WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
655                 ret = extract_single_image(w, image, output_dir, extract_flags);
656         }
657         if (num_additional_swms) {
658                 free_lookup_table(w->lookup_table);
659                 w->lookup_table = w_tab_save;
660         }
661
662         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
663                              WIMLIB_EXTRACT_FLAG_HARDLINK))
664         {
665                 for_lookup_table_entry(w->lookup_table,
666                                        lte_free_extracted_file,
667                                        NULL);
668         }
669         return ret;
670
671 }