]> wimlib.net Git - wimlib/blob - src/extract_image.c
e1096647ae5cd7e7346361c0d29abdc6e7deae7e
[wimlib] / src / extract_image.c
1 /*
2  * extract_image.c
3  *
4  * Support for extracting WIM files.
5  */
6
7 /*
8  * Copyright (C) 2012, 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option)
15  * any later version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with wimlib; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #include "config.h"
27
28 #include <dirent.h>
29
30 #ifdef __WIN32__
31 #  include "win32.h"
32 #else
33 #  ifdef HAVE_UTIME_H
34 #    include <utime.h>
35 #  endif
36 #  include "timestamp.h"
37 #  include <sys/time.h>
38 #endif
39
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <sys/stat.h>
45 #include <unistd.h>
46
47 #include "dentry.h"
48 #include "lookup_table.h"
49 #include "wimlib_internal.h"
50 #include "xml.h"
51
52 #ifdef WITH_NTFS_3G
53 #  include <ntfs-3g/volume.h>
54 #endif
55
56 #ifdef HAVE_ALLOCA_H
57 #  include <alloca.h>
58 #endif
59
60 #ifndef __WIN32__
61
62 /* Returns the number of components of @path.  */
63 static unsigned
64 get_num_path_components(const char *path)
65 {
66         unsigned num_components = 0;
67         while (*path) {
68                 while (*path == '/')
69                         path++;
70                 if (*path)
71                         num_components++;
72                 while (*path && *path != '/')
73                         path++;
74         }
75         return num_components;
76 }
77
78 static const char *
79 path_next_part(const char *path)
80 {
81         while (*path && *path != '/')
82                 path++;
83         while (*path && *path == '/')
84                 path++;
85         return path;
86 }
87
88 static int
89 extract_regular_file_linked(struct wim_dentry *dentry,
90                             const char *output_path,
91                             struct apply_args *args,
92                             struct wim_lookup_table_entry *lte)
93 {
94         /* This mode overrides the normal hard-link extraction and
95          * instead either symlinks or hardlinks *all* identical files in
96          * the WIM, even if they are in a different image (in the case
97          * of a multi-image extraction) */
98
99         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_HARDLINK) {
100                 if (link(lte->extracted_file, output_path) != 0) {
101                         ERROR_WITH_ERRNO("Failed to hard link "
102                                          "`%s' to `%s'",
103                                          output_path, lte->extracted_file);
104                         return WIMLIB_ERR_LINK;
105                 }
106         } else {
107                 int num_path_components;
108                 int num_output_dir_path_components;
109                 size_t extracted_file_len;
110                 char *p;
111                 const char *p2;
112                 size_t i;
113
114                 num_path_components = get_num_path_components(dentry->_full_path) - 1;
115                 num_output_dir_path_components = get_num_path_components(args->target);
116
117                 if (args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) {
118                         num_path_components++;
119                         num_output_dir_path_components--;
120                 }
121                 extracted_file_len = strlen(lte->extracted_file);
122
123                 char buf[extracted_file_len + 3 * num_path_components + 1];
124                 p = &buf[0];
125
126                 for (i = 0; i < num_path_components; i++) {
127                         *p++ = '.';
128                         *p++ = '.';
129                         *p++ = '/';
130                 }
131                 p2 = lte->extracted_file;
132                 while (*p2 == '/')
133                         p2++;
134                 while (num_output_dir_path_components > 0) {
135                         p2 = path_next_part(p2);
136                         num_output_dir_path_components--;
137                 }
138                 strcpy(p, p2);
139                 if (symlink(buf, output_path) != 0) {
140                         ERROR_WITH_ERRNO("Failed to symlink `%s' to `%s'",
141                                          buf, lte->extracted_file);
142                         return WIMLIB_ERR_LINK;
143                 }
144         }
145         return 0;
146 }
147
148 static int
149 symlink_apply_unix_data(const char *link,
150                         const struct wimlib_unix_data *unix_data)
151 {
152         if (lchown(link, unix_data->uid, unix_data->gid)) {
153                 if (errno == EPERM) {
154                         /* Ignore */
155                         WARNING_WITH_ERRNO("failed to set symlink UNIX owner/group");
156                 } else {
157                         ERROR_WITH_ERRNO("failed to set symlink UNIX owner/group");
158                         return WIMLIB_ERR_INVALID_DENTRY;
159                 }
160         }
161         return 0;
162 }
163
164 static int
165 fd_apply_unix_data(int fd, const struct wimlib_unix_data *unix_data)
166 {
167         if (fchown(fd, unix_data->uid, unix_data->gid)) {
168                 if (errno == EPERM) {
169                         WARNING_WITH_ERRNO("failed to set file UNIX owner/group");
170                         /* Ignore? */
171                 } else {
172                         ERROR_WITH_ERRNO("failed to set file UNIX owner/group");
173                         return WIMLIB_ERR_INVALID_DENTRY;
174                 }
175         }
176
177         if (fchmod(fd, unix_data->mode)) {
178                 if (errno == EPERM) {
179                         WARNING_WITH_ERRNO("failed to set UNIX file mode");
180                         /* Ignore? */
181                 } else {
182                         ERROR_WITH_ERRNO("failed to set UNIX file mode");
183                         return WIMLIB_ERR_INVALID_DENTRY;
184                 }
185         }
186         return 0;
187 }
188
189 static int
190 dir_apply_unix_data(const char *dir, const struct wimlib_unix_data *unix_data)
191 {
192         int dfd = open(dir, O_RDONLY);
193         int ret;
194         if (dfd >= 0) {
195                 ret = fd_apply_unix_data(dfd, unix_data);
196                 if (close(dfd)) {
197                         ERROR_WITH_ERRNO("can't close directory `%s'", dir);
198                         ret = WIMLIB_ERR_MKDIR;
199                 }
200         } else {
201                 ERROR_WITH_ERRNO("can't open directory `%s'", dir);
202                 ret = WIMLIB_ERR_MKDIR;
203         }
204         return ret;
205 }
206
207 static int
208 extract_regular_file_unlinked(struct wim_dentry *dentry,
209                               struct apply_args *args,
210                               const char *output_path,
211                               struct wim_lookup_table_entry *lte)
212 {
213         /* Normal mode of extraction.  Regular files and hard links are
214          * extracted in the way that they appear in the WIM. */
215
216         int out_fd;
217         int ret;
218         struct wim_inode *inode = dentry->d_inode;
219
220         if (!((args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE)
221                 && (args->extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
222                                      WIMLIB_EXTRACT_FLAG_HARDLINK))))
223         {
224                 /* If the dentry is part of a hard link set of at least 2
225                  * dentries and one of the other dentries has already been
226                  * extracted, make a hard link to the file corresponding to this
227                  * already-extracted directory.  Otherwise, extract the file and
228                  * set the inode->i_extracted_file field so that other dentries
229                  * in the hard link group can link to it. */
230                 if (inode->i_nlink > 1) {
231                         if (inode->i_extracted_file) {
232                                 DEBUG("Extracting hard link `%s' => `%s'",
233                                       output_path, inode->i_extracted_file);
234                                 if (link(inode->i_extracted_file, output_path) != 0) {
235                                         ERROR_WITH_ERRNO("Failed to hard link "
236                                                          "`%s' to `%s'",
237                                                          output_path,
238                                                          inode->i_extracted_file);
239                                         return WIMLIB_ERR_LINK;
240                                 }
241                                 return 0;
242                         }
243                         FREE(inode->i_extracted_file);
244                         inode->i_extracted_file = STRDUP(output_path);
245                         if (!inode->i_extracted_file) {
246                                 ERROR("Failed to allocate memory for filename");
247                                 return WIMLIB_ERR_NOMEM;
248                         }
249                 }
250         }
251
252         /* Extract the contents of the file to @output_path. */
253
254         out_fd = open(output_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
255         if (out_fd == -1) {
256                 ERROR_WITH_ERRNO("Failed to open the file `%s' for writing",
257                                  output_path);
258                 return WIMLIB_ERR_OPEN;
259         }
260
261         if (!lte) {
262                 /* Empty file with no lookup table entry */
263                 DEBUG("Empty file `%s'.", output_path);
264                 ret = 0;
265                 goto out_extract_unix_data;
266         }
267
268         ret = extract_wim_resource_to_fd(lte, out_fd, wim_resource_size(lte));
269         if (ret) {
270                 ERROR("Failed to extract resource to `%s'", output_path);
271                 goto out;
272         }
273
274 out_extract_unix_data:
275         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
276                 struct wimlib_unix_data unix_data;
277                 ret = inode_get_unix_data(inode, &unix_data, NULL);
278                 if (ret > 0)
279                         ;
280                 else if (ret < 0)
281                         ret = 0;
282                 else
283                         ret = fd_apply_unix_data(out_fd, &unix_data);
284                 if (ret)
285                         goto out;
286         }
287         if (lte)
288                 args->progress.extract.completed_bytes += wim_resource_size(lte);
289 out:
290         if (close(out_fd) != 0) {
291                 ERROR_WITH_ERRNO("Failed to close file `%s'", output_path);
292                 if (ret == 0)
293                         ret = WIMLIB_ERR_WRITE;
294         }
295         return ret;
296 }
297
298 static int
299 extract_regular_file(struct wim_dentry *dentry,
300                      struct apply_args *args,
301                      const char *output_path)
302 {
303         struct wim_lookup_table_entry *lte;
304         const struct wim_inode *inode = dentry->d_inode;
305
306         lte = inode_unnamed_lte_resolved(inode);
307
308         if (lte && (args->extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
309                                            WIMLIB_EXTRACT_FLAG_HARDLINK)))
310         {
311                 if (lte->extracted_file) {
312                         return extract_regular_file_linked(dentry, output_path, args, lte);
313                 } else {
314                         lte->extracted_file = STRDUP(output_path);
315                         if (!lte->extracted_file)
316                                 return WIMLIB_ERR_NOMEM;
317                 }
318         }
319         return extract_regular_file_unlinked(dentry, args, output_path, lte);
320 }
321
322 static int
323 extract_symlink(struct wim_dentry *dentry,
324                 struct apply_args *args,
325                 const char *output_path)
326 {
327         char target[4096 + args->target_realpath_len];
328         char *fixed_target;
329         const struct wim_inode *inode = dentry->d_inode;
330
331         ssize_t ret = wim_inode_readlink(inode,
332                                          target + args->target_realpath_len,
333                                          sizeof(target) - args->target_realpath_len - 1);
334         struct wim_lookup_table_entry *lte;
335
336         if (ret <= 0) {
337                 ERROR("Could not read the symbolic link from dentry `%s'",
338                       dentry->_full_path);
339                 return WIMLIB_ERR_INVALID_DENTRY;
340         }
341         target[args->target_realpath_len + ret] = '\0';
342         if (target[args->target_realpath_len] == '/' &&
343             args->extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX)
344         {
345                 /* Fix absolute symbolic link target to point into the actual
346                  * extraction destination */
347                 memcpy(target, args->target_realpath,
348                        args->target_realpath_len);
349                 fixed_target = target;
350         } else {
351                 /* Keep same link target */
352                 fixed_target = target + args->target_realpath_len;
353         }
354         ret = symlink(fixed_target, output_path);
355         if (ret) {
356                 ERROR_WITH_ERRNO("Failed to symlink `%s' to `%s'",
357                                  output_path, fixed_target);
358                 return WIMLIB_ERR_LINK;
359         }
360         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
361                 struct wimlib_unix_data unix_data;
362                 ret = inode_get_unix_data(inode, &unix_data, NULL);
363                 if (ret > 0)
364                         ;
365                 else if (ret < 0)
366                         ret = 0;
367                 else
368                         ret = symlink_apply_unix_data(output_path, &unix_data);
369                 if (ret)
370                         return ret;
371         }
372         lte = inode_unnamed_lte_resolved(inode);
373         wimlib_assert(lte != NULL);
374         args->progress.extract.completed_bytes += wim_resource_size(lte);
375         return 0;
376 }
377
378 #endif /* !__WIN32__ */
379
380 static int
381 extract_directory(struct wim_dentry *dentry,
382                   const tchar *output_path, bool is_root)
383 {
384         int ret;
385         struct stat stbuf;
386
387         ret = tstat(output_path, &stbuf);
388         if (ret == 0) {
389                 if (S_ISDIR(stbuf.st_mode)) {
390                         /*if (!is_root)*/
391                                 /*WARNING("`%s' already exists", output_path);*/
392                         goto dir_exists;
393                 } else {
394                         ERROR("`%"TS"' is not a directory", output_path);
395                         return WIMLIB_ERR_MKDIR;
396                 }
397         } else {
398                 if (errno != ENOENT) {
399                         ERROR_WITH_ERRNO("Failed to stat `%"TS"'", output_path);
400                         return WIMLIB_ERR_STAT;
401                 }
402         }
403
404         if (tmkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH))
405         {
406                 ERROR_WITH_ERRNO("Cannot create directory `%"TS"'", output_path);
407                 return WIMLIB_ERR_MKDIR;
408         }
409 dir_exists:
410         ret = 0;
411 #ifndef __WIN32__
412         if (dentry) {
413                 struct wimlib_unix_data unix_data;
414                 ret = inode_get_unix_data(dentry->d_inode, &unix_data, NULL);
415                 if (ret > 0)
416                         ;
417                 else if (ret < 0)
418                         ret = 0;
419                 else
420                         ret = dir_apply_unix_data(output_path, &unix_data);
421         }
422 #endif
423         return ret;
424 }
425
426 #ifndef __WIN32__
427 static int
428 unix_do_apply_dentry(const char *output_path, size_t output_path_len,
429                      struct wim_dentry *dentry, struct apply_args *args)
430 {
431         const struct wim_inode *inode = dentry->d_inode;
432
433         if (inode_is_symlink(inode))
434                 return extract_symlink(dentry, args, output_path);
435         else if (inode_is_directory(inode))
436                 return extract_directory((args->extract_flags &
437                                            WIMLIB_EXTRACT_FLAG_UNIX_DATA) ? dentry : NULL,
438                                          output_path, false);
439         else
440                 return extract_regular_file(dentry, args, output_path);
441 }
442
443 static int
444 unix_do_apply_dentry_timestamps(const char *output_path,
445                                 size_t output_path_len,
446                                 const struct wim_dentry *dentry,
447                                 struct apply_args *args)
448 {
449         int ret;
450         const struct wim_inode *inode = dentry->d_inode;
451
452 #ifdef HAVE_UTIMENSAT
453         /* Convert the WIM timestamps, which are accurate to 100 nanoseconds,
454          * into `struct timespec's for passing to utimensat(), which is accurate
455          * to 1 nanosecond. */
456
457         struct timespec ts[2];
458         ts[0] = wim_timestamp_to_timespec(inode->i_last_access_time);
459         ts[1] = wim_timestamp_to_timespec(inode->i_last_write_time);
460         ret = utimensat(AT_FDCWD, output_path, ts, AT_SYMLINK_NOFOLLOW);
461         if (ret)
462                 ret = errno;
463 #else
464         ret = ENOSYS;
465 #endif
466
467         if (ret == ENOSYS) {
468                 /* utimensat() not implemented or not available */
469         #ifdef HAVE_LUTIMES
470                 /* Convert the WIM timestamps, which are accurate to 100
471                  * nanoseconds, into `struct timeval's for passing to lutimes(),
472                  * which is accurate to 1 microsecond. */
473                 struct timeval tv[2];
474                 tv[0] = wim_timestamp_to_timeval(inode->i_last_access_time);
475                 tv[1] = wim_timestamp_to_timeval(inode->i_last_write_time);
476                 ret = lutimes(output_path, tv);
477                 if (ret)
478                         ret = errno;
479         #endif
480         }
481
482         if (ret == ENOSYS) {
483                 /* utimensat() and lutimes() both not implemented or not
484                  * available */
485         #ifdef HAVE_UTIME
486                 /* Convert the WIM timestamps, which are accurate to 100
487                  * nanoseconds, into a `struct utimbuf's for passing to
488                  * utime(), which is accurate to 1 second. */
489                 struct utimbuf buf;
490                 buf.actime = wim_timestamp_to_unix(inode->i_last_access_time);
491                 buf.modtime = wim_timestamp_to_unix(inode->i_last_write_time);
492                 ret = utime(output_path, &buf);
493         #endif
494         }
495         if (ret && args->num_utime_warnings < 10) {
496                 WARNING_WITH_ERRNO("Failed to set timestamp on file `%s'",
497                                     output_path);
498                 args->num_utime_warnings++;
499         }
500         return 0;
501 }
502 #endif /* !__WIN32__ */
503
504 /* Extracts a file, directory, or symbolic link from the WIM archive. */
505 static int
506 apply_dentry_normal(struct wim_dentry *dentry, void *arg)
507 {
508         struct apply_args *args = arg;
509         size_t len;
510         tchar *output_path;
511
512         len = tstrlen(args->target);
513         if (dentry_is_root(dentry)) {
514                 output_path = (tchar*)args->target;
515         } else {
516                 output_path = alloca(len * sizeof(tchar) + dentry->full_path_nbytes +
517                                      sizeof(tchar));
518                 memcpy(output_path, args->target, len * sizeof(tchar));
519                 memcpy(output_path + len, dentry->_full_path, dentry->full_path_nbytes);
520                 len += dentry->full_path_nbytes / sizeof(tchar);
521                 output_path[len] = T('\0');
522         }
523 #ifdef __WIN32__
524         return win32_do_apply_dentry(output_path, len, dentry, args);
525 #else
526         return unix_do_apply_dentry(output_path, len, dentry, args);
527 #endif
528 }
529
530
531 /* Apply timestamps to an extracted file or directory */
532 static int
533 apply_dentry_timestamps_normal(struct wim_dentry *dentry, void *arg)
534 {
535         struct apply_args *args = arg;
536         size_t len;
537         tchar *output_path;
538
539         len = tstrlen(args->target);
540         if (dentry_is_root(dentry)) {
541                 output_path = (tchar*)args->target;
542         } else {
543                 output_path = alloca(len * sizeof(tchar) + dentry->full_path_nbytes +
544                                      sizeof(tchar));
545                 memcpy(output_path, args->target, len * sizeof(tchar));
546                 memcpy(output_path + len, dentry->_full_path, dentry->full_path_nbytes);
547                 len += dentry->full_path_nbytes / sizeof(tchar);
548                 output_path[len] = T('\0');
549         }
550
551 #ifdef __WIN32__
552         return win32_do_apply_dentry_timestamps(output_path, len, dentry, args);
553 #else
554         return unix_do_apply_dentry_timestamps(output_path, len, dentry, args);
555 #endif
556 }
557
558 /* Extract a dentry if it hasn't already been extracted, and either the dentry
559  * has no streams or WIMLIB_EXTRACT_FLAG_NO_STREAMS is not specified. */
560 static int
561 maybe_apply_dentry(struct wim_dentry *dentry, void *arg)
562 {
563         struct apply_args *args = arg;
564         int ret;
565
566         if (dentry->is_extracted)
567                 return 0;
568
569         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS)
570                 if (inode_unnamed_lte_resolved(dentry->d_inode) &&
571                     !(dentry->d_inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY |
572                                                        FILE_ATTRIBUTE_ENCRYPTED)))
573                         return 0;
574
575         if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) &&
576              args->progress_func) {
577                 args->progress.extract.cur_path = dentry->_full_path;
578                 args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY,
579                                     &args->progress);
580         }
581         ret = args->apply_dentry(dentry, args);
582         if (ret == 0)
583                 dentry->is_extracted = 1;
584         return ret;
585 }
586
587 static void
588 calculate_bytes_to_extract(struct list_head *stream_list,
589                            int extract_flags,
590                            union wimlib_progress_info *progress)
591 {
592         struct wim_lookup_table_entry *lte;
593         u64 total_bytes = 0;
594         u64 num_streams = 0;
595
596         /* For each stream to be extracted... */
597         list_for_each_entry(lte, stream_list, extraction_list) {
598                 if (extract_flags &
599                     (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
600                 {
601                         /* In the symlink or hard link extraction mode, each
602                          * stream will be extracted one time regardless of how
603                          * many dentries share the stream. */
604                         wimlib_assert(!(extract_flags & WIMLIB_EXTRACT_FLAG_NTFS));
605                         if (!lte->extracted_file) {
606                                 num_streams++;
607                                 total_bytes += wim_resource_size(lte);
608                         }
609                 } else {
610                         num_streams += lte->out_refcnt;
611                         total_bytes += lte->out_refcnt * wim_resource_size(lte);
612                 }
613         }
614         progress->extract.num_streams = num_streams;
615         progress->extract.total_bytes = total_bytes;
616         progress->extract.completed_bytes = 0;
617 }
618
619 static void
620 maybe_add_stream_for_extraction(struct wim_lookup_table_entry *lte,
621                                 struct list_head *stream_list)
622 {
623         if (++lte->out_refcnt == 1) {
624                 INIT_LIST_HEAD(&lte->inode_list);
625                 list_add_tail(&lte->extraction_list, stream_list);
626         }
627 }
628
629 static void
630 inode_find_streams_for_extraction(struct wim_inode *inode,
631                                   struct list_head *stream_list,
632                                   int extract_flags)
633 {
634         struct wim_lookup_table_entry *lte;
635         bool inode_added = false;
636
637         lte = inode_unnamed_lte_resolved(inode);
638         if (lte) {
639                 maybe_add_stream_for_extraction(lte, stream_list);
640                 list_add_tail(&inode->i_lte_inode_list, &lte->inode_list);
641                 inode_added = true;
642         }
643
644         /* Determine whether to include alternate data stream entries or not.
645          *
646          * UNIX:  Include them if extracting using NTFS-3g.
647          *
648          * Windows: Include them undconditionally, although if the filesystem is
649          * not NTFS we won't actually be able to extract them. */
650 #if defined(WITH_NTFS_3G)
651         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS)
652 #elif defined(__WIN32__)
653         if (1)
654 #else
655         if (0)
656 #endif
657         {
658                 for (unsigned i = 0; i < inode->i_num_ads; i++) {
659                         if (inode->i_ads_entries[i].stream_name_nbytes != 0) {
660                                 lte = inode->i_ads_entries[i].lte;
661                                 if (lte) {
662                                         maybe_add_stream_for_extraction(lte,
663                                                                         stream_list);
664                                         if (!inode_added) {
665                                                 list_add_tail(&inode->i_lte_inode_list,
666                                                               &lte->inode_list);
667                                                 inode_added = true;
668                                         }
669                                 }
670                         }
671                 }
672         }
673 }
674
675 static void
676 find_streams_for_extraction(struct wim_image_metadata *imd,
677                             struct list_head *stream_list,
678                             struct wim_lookup_table *lookup_table,
679                             int extract_flags)
680 {
681         struct wim_inode *inode;
682         struct wim_dentry *dentry;
683
684         for_lookup_table_entry(lookup_table, lte_zero_out_refcnt, NULL);
685         INIT_LIST_HEAD(stream_list);
686         image_for_each_inode(inode, imd) {
687                 if (!inode->i_resolved)
688                         inode_resolve_ltes(inode, lookup_table);
689                 inode_for_each_dentry(dentry, inode)
690                         dentry->is_extracted = 0;
691                 inode_find_streams_for_extraction(inode, stream_list,
692                                                   extract_flags);
693         }
694 }
695
696 struct apply_operations {
697         int (*apply_dentry)(struct wim_dentry *dentry, void *arg);
698         int (*apply_dentry_timestamps)(struct wim_dentry *dentry, void *arg);
699 };
700
701 static const struct apply_operations normal_apply_operations = {
702         .apply_dentry = apply_dentry_normal,
703         .apply_dentry_timestamps = apply_dentry_timestamps_normal,
704 };
705
706 #ifdef WITH_NTFS_3G
707 static const struct apply_operations ntfs_apply_operations = {
708         .apply_dentry = apply_dentry_ntfs,
709         .apply_dentry_timestamps = apply_dentry_timestamps_ntfs,
710 };
711 #endif
712
713 static int
714 apply_stream_list(struct list_head *stream_list,
715                   struct apply_args *args,
716                   const struct apply_operations *ops,
717                   wimlib_progress_func_t progress_func)
718 {
719         uint64_t bytes_per_progress = args->progress.extract.total_bytes / 100;
720         uint64_t next_progress = bytes_per_progress;
721         struct wim_lookup_table_entry *lte;
722         struct wim_inode *inode;
723         struct wim_dentry *dentry;
724         int ret;
725
726         /* This complicated loop is essentially looping through the dentries,
727          * although dentries may be visited more than once (if a dentry contains
728          * two different nonempty streams) or not at all (if a dentry contains
729          * no non-empty streams).
730          *
731          * The outer loop is over the distinct streams to be extracted so that
732          * sequential reading of the WIM can be implemented. */
733
734         /* For each distinct stream to be extracted */
735         list_for_each_entry(lte, stream_list, extraction_list) {
736                 /* For each inode that contains the stream */
737                 list_for_each_entry(inode, &lte->inode_list, i_lte_inode_list) {
738                         /* For each dentry that points to the inode */
739                         inode_for_each_dentry(dentry, inode) {
740                                 /* Extract the dentry if it was not already
741                                  * extracted */
742                                 ret = maybe_apply_dentry(dentry, args);
743                                 if (ret)
744                                         return ret;
745                                 if (progress_func &&
746                                     args->progress.extract.completed_bytes >= next_progress)
747                                 {
748                                         progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS,
749                                                       &args->progress);
750                                         if (args->progress.extract.completed_bytes >=
751                                             args->progress.extract.total_bytes)
752                                         {
753                                                 next_progress = ~0ULL;
754                                         } else {
755                                                 next_progress =
756                                                         min (args->progress.extract.completed_bytes +
757                                                              bytes_per_progress,
758                                                              args->progress.extract.total_bytes);
759                                         }
760                                 }
761                         }
762                 }
763         }
764         return 0;
765 }
766
767 static int
768 sort_stream_list_by_wim_position(struct list_head *stream_list)
769 {
770         struct list_head *cur;
771         size_t num_streams;
772         struct wim_lookup_table_entry **array;
773         size_t i;
774         size_t array_size;
775
776         num_streams = 0;
777         list_for_each(cur, stream_list)
778                 num_streams++;
779         array_size = num_streams * sizeof(array[0]);
780         array = MALLOC(array_size);
781         if (!array) {
782                 ERROR("Failed to allocate %zu bytes to sort stream entries",
783                       array_size);
784                 return WIMLIB_ERR_NOMEM;
785         }
786         cur = stream_list->next;
787         for (i = 0; i < num_streams; i++) {
788                 array[i] = container_of(cur, struct wim_lookup_table_entry, extraction_list);
789                 cur = cur->next;
790         }
791
792         qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position);
793
794         INIT_LIST_HEAD(stream_list);
795         for (i = 0; i < num_streams; i++)
796                 list_add_tail(&array[i]->extraction_list, stream_list);
797         FREE(array);
798         return 0;
799 }
800
801
802 /* Extracts the image @image from the WIM @w to the directory or NTFS volume
803  * @target. */
804 static int
805 extract_single_image(WIMStruct *w, int image,
806                      const tchar *target, int extract_flags,
807                      wimlib_progress_func_t progress_func)
808 {
809         int ret;
810         struct list_head stream_list;
811
812         struct apply_args args;
813         const struct apply_operations *ops;
814
815         memset(&args, 0, sizeof(args));
816
817         args.w                  = w;
818         args.target             = target;
819         args.extract_flags      = extract_flags;
820         args.progress_func      = progress_func;
821
822         if (progress_func) {
823                 args.progress.extract.wimfile_name = w->filename;
824                 args.progress.extract.image = image;
825                 args.progress.extract.extract_flags = (extract_flags &
826                                                        WIMLIB_EXTRACT_MASK_PUBLIC);
827                 args.progress.extract.image_name = wimlib_get_image_name(w, image);
828                 args.progress.extract.target = target;
829         }
830
831 #ifdef WITH_NTFS_3G
832         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
833                 args.vol = ntfs_mount(target, 0);
834                 if (!args.vol) {
835                         ERROR_WITH_ERRNO("Failed to mount NTFS volume `%"TS"'",
836                                          target);
837                         return WIMLIB_ERR_NTFS_3G;
838                 }
839                 ops = &ntfs_apply_operations;
840         } else
841 #endif
842                 ops = &normal_apply_operations;
843
844         ret = select_wim_image(w, image);
845         if (ret)
846                 goto out;
847
848         /* Build a list of the streams that need to be extracted */
849         find_streams_for_extraction(wim_get_current_image_metadata(w),
850                                     &stream_list,
851                                     w->lookup_table, extract_flags);
852
853         /* Calculate the number of bytes of data that will be extracted */
854         calculate_bytes_to_extract(&stream_list, extract_flags,
855                                    &args.progress);
856
857         if (progress_func) {
858                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN,
859                               &args.progress);
860         }
861
862         /* If a sequential extraction was specified, sort the streams to be
863          * extracted by their position in the WIM file, so that the WIM file can
864          * be read sequentially. */
865         if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) {
866                 ret = sort_stream_list_by_wim_position(&stream_list);
867                 if (ret != 0) {
868                         WARNING("Falling back to non-sequential extraction");
869                         extract_flags &= ~WIMLIB_EXTRACT_FLAG_SEQUENTIAL;
870                 }
871         }
872
873         if (progress_func) {
874                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN,
875                               &args.progress);
876         }
877
878         ret = calculate_dentry_tree_full_paths(wim_root_dentry(w));
879         if (ret)
880                 goto out;
881
882         /* Make the directory structure and extract empty files */
883         args.extract_flags |= WIMLIB_EXTRACT_FLAG_NO_STREAMS;
884         args.apply_dentry = ops->apply_dentry;
885         ret = for_dentry_in_tree(wim_root_dentry(w), maybe_apply_dentry, &args);
886         args.extract_flags &= ~WIMLIB_EXTRACT_FLAG_NO_STREAMS;
887         if (ret)
888                 goto out;
889
890         if (progress_func) {
891                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END,
892                               &args.progress);
893         }
894
895         if (extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX) {
896                 args.target_realpath = realpath(target, NULL);
897                 if (!args.target_realpath)
898                         return WIMLIB_ERR_NOMEM;
899                 args.target_realpath_len = tstrlen(args.target_realpath);
900         }
901
902         /* Extract non-empty files */
903         ret = apply_stream_list(&stream_list, &args, ops, progress_func);
904         if (ret)
905                 goto out_free_target_realpath;
906
907         if (progress_func) {
908                 progress_func(WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS,
909                               &args.progress);
910         }
911
912         /* Apply timestamps */
913         ret = for_dentry_in_tree_depth(wim_root_dentry(w),
914                                        ops->apply_dentry_timestamps, &args);
915         if (ret)
916                 goto out_free_target_realpath;
917
918         if (progress_func) {
919                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END,
920                               &args.progress);
921         }
922 out_free_target_realpath:
923         FREE(args.target_realpath);
924 out:
925 #ifdef WITH_NTFS_3G
926         /* Unmount the NTFS volume */
927         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
928                 if (ntfs_umount(args.vol, FALSE) != 0) {
929                         ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%"TS"'",
930                                          args.target);
931                         if (ret == 0)
932                                 ret = WIMLIB_ERR_NTFS_3G;
933                 }
934         }
935 #endif
936         return ret;
937 }
938
939 static const tchar * const filename_forbidden_chars =
940 T(
941 #ifdef __WIN32__
942 "<>:\"/\\|?*"
943 #else
944 "/"
945 #endif
946 );
947
948 /* This function checks if it is okay to use a WIM image's name as a directory
949  * name.  */
950 static bool
951 image_name_ok_as_dir(const tchar *image_name)
952 {
953         return image_name && *image_name &&
954                 !tstrpbrk(image_name, filename_forbidden_chars);
955 }
956
957 /* Extracts all images from the WIM to the directory @target, with the images
958  * placed in subdirectories named by their image names. */
959 static int
960 extract_all_images(WIMStruct *w,
961                    const tchar *target,
962                    int extract_flags,
963                    wimlib_progress_func_t progress_func)
964 {
965         size_t image_name_max_len = max(xml_get_max_image_name_len(w), 20);
966         size_t output_path_len = tstrlen(target);
967         tchar buf[output_path_len + 1 + image_name_max_len + 1];
968         int ret;
969         int image;
970         const tchar *image_name;
971
972         ret = extract_directory(NULL, target, true);
973         if (ret)
974                 return ret;
975
976         tmemcpy(buf, target, output_path_len);
977         buf[output_path_len] = T('/');
978         for (image = 1; image <= w->hdr.image_count; image++) {
979                 image_name = wimlib_get_image_name(w, image);
980                 if (image_name_ok_as_dir(image_name)) {
981                         tstrcpy(buf + output_path_len + 1, image_name);
982                 } else {
983                         /* Image name is empty, or contains forbidden
984                          * characters. */
985                         tsprintf(buf + output_path_len + 1, T("%d"), image);
986                 }
987                 ret = extract_single_image(w, image, buf, extract_flags,
988                                            progress_func);
989                 if (ret != 0)
990                         return ret;
991         }
992         return 0;
993 }
994
995 /* Extracts a single image or all images from a WIM file to a directory or NTFS
996  * volume. */
997 WIMLIBAPI int
998 wimlib_extract_image(WIMStruct *w,
999                      int image,
1000                      const tchar *target,
1001                      int extract_flags,
1002                      WIMStruct **additional_swms,
1003                      unsigned num_additional_swms,
1004                      wimlib_progress_func_t progress_func)
1005 {
1006         struct wim_lookup_table *joined_tab, *w_tab_save;
1007         int ret;
1008
1009         if (!target)
1010                 return WIMLIB_ERR_INVALID_PARAM;
1011
1012         extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
1013
1014         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
1015                         == (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
1016                 return WIMLIB_ERR_INVALID_PARAM;
1017
1018 #ifdef __WIN32__
1019         if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
1020                 ERROR("Extracting UNIX data is not supported on Windows");
1021                 return WIMLIB_ERR_INVALID_PARAM;
1022         }
1023         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK)) {
1024                 ERROR("Linked extraction modes are not supported on Windows");
1025                 return WIMLIB_ERR_INVALID_PARAM;
1026         }
1027 #endif
1028
1029         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
1030 #ifdef WITH_NTFS_3G
1031                 if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))) {
1032                         ERROR("Cannot specify symlink or hardlink flags when applying\n"
1033                               "        directly to a NTFS volume");
1034                         return WIMLIB_ERR_INVALID_PARAM;
1035                 }
1036                 if (image == WIMLIB_ALL_IMAGES) {
1037                         ERROR("Can only apply a single image when applying "
1038                               "directly to a NTFS volume");
1039                         return WIMLIB_ERR_INVALID_PARAM;
1040                 }
1041                 if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
1042                         ERROR("Cannot restore UNIX-specific data in the NTFS extraction mode");
1043                         return WIMLIB_ERR_INVALID_PARAM;
1044                 }
1045 #else
1046                 ERROR("wimlib was compiled without support for NTFS-3g, so");
1047                 ERROR("we cannot apply a WIM image directly to a NTFS volume");
1048                 return WIMLIB_ERR_UNSUPPORTED;
1049 #endif
1050         }
1051
1052         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
1053                               WIMLIB_EXTRACT_FLAG_RPFIX)) ==
1054                 (WIMLIB_EXTRACT_FLAG_RPFIX | WIMLIB_EXTRACT_FLAG_NORPFIX))
1055         {
1056                 ERROR("Cannot specify RPFIX and NORPFIX flags at the same time!");
1057                 return WIMLIB_ERR_INVALID_PARAM;
1058         }
1059
1060         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
1061                               WIMLIB_EXTRACT_FLAG_NORPFIX)) == 0)
1062                 if (w->hdr.flags & WIM_HDR_FLAG_RP_FIX)
1063                         extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX;
1064
1065         ret = verify_swm_set(w, additional_swms, num_additional_swms);
1066         if (ret)
1067                 return ret;
1068
1069         ret = wim_checksum_unhashed_streams(w);
1070         if (ret)
1071                 return ret;
1072
1073         if (num_additional_swms) {
1074                 ret = new_joined_lookup_table(w, additional_swms,
1075                                               num_additional_swms, &joined_tab);
1076                 if (ret)
1077                         return ret;
1078                 w_tab_save = w->lookup_table;
1079                 w->lookup_table = joined_tab;
1080         }
1081
1082         if (image == WIMLIB_ALL_IMAGES) {
1083                 extract_flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
1084                 ret = extract_all_images(w, target, extract_flags,
1085                                          progress_func);
1086         } else {
1087                 extract_flags &= ~WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
1088                 ret = extract_single_image(w, image, target, extract_flags,
1089                                            progress_func);
1090         }
1091
1092         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
1093                              WIMLIB_EXTRACT_FLAG_HARDLINK))
1094         {
1095                 for_lookup_table_entry(w->lookup_table,
1096                                        lte_free_extracted_file,
1097                                        NULL);
1098         }
1099
1100         if (num_additional_swms) {
1101                 free_lookup_table(w->lookup_table);
1102                 w->lookup_table = w_tab_save;
1103         }
1104         return ret;
1105 }