]> wimlib.net Git - wimlib/blob - src/extract_image.c
cd23e45d4b4cf050da22bb1ae1ca431f7c761255
[wimlib] / src / extract_image.c
1 /*
2  * extract_image.c
3  *
4  * Support for extracting WIM files.
5  */
6
7 /*
8  * Copyright (C) 2012, 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option)
15  * any later version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with wimlib; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #include "config.h"
27
28 #include <dirent.h>
29
30 #ifdef __WIN32__
31 #  include "win32.h"
32 #else
33 #  ifdef HAVE_UTIME_H
34 #    include <utime.h>
35 #  endif
36 #  include "timestamp.h"
37 #  include <sys/time.h>
38 #endif
39
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <sys/stat.h>
45 #include <unistd.h>
46
47 #include "dentry.h"
48 #include "lookup_table.h"
49 #include "wimlib_internal.h"
50 #include "xml.h"
51
52 #ifdef WITH_NTFS_3G
53 #  include <ntfs-3g/volume.h>
54 #endif
55
56 #ifdef HAVE_ALLOCA_H
57 #  include <alloca.h>
58 #endif
59
60 #ifndef __WIN32__
61
62 /* Returns the number of components of @path.  */
63 static unsigned
64 get_num_path_components(const char *path)
65 {
66         unsigned num_components = 0;
67         while (*path) {
68                 while (*path == '/')
69                         path++;
70                 if (*path)
71                         num_components++;
72                 while (*path && *path != '/')
73                         path++;
74         }
75         return num_components;
76 }
77
78 static const char *
79 path_next_part(const char *path)
80 {
81         while (*path && *path != '/')
82                 path++;
83         while (*path && *path == '/')
84                 path++;
85         return path;
86 }
87
88 static int
89 extract_regular_file_linked(struct wim_dentry *dentry,
90                             const char *output_path,
91                             struct apply_args *args,
92                             struct wim_lookup_table_entry *lte)
93 {
94         /* This mode overrides the normal hard-link extraction and
95          * instead either symlinks or hardlinks *all* identical files in
96          * the WIM, even if they are in a different image (in the case
97          * of a multi-image extraction) */
98
99         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_HARDLINK) {
100                 if (link(lte->extracted_file, output_path) != 0) {
101                         ERROR_WITH_ERRNO("Failed to hard link "
102                                          "`%s' to `%s'",
103                                          output_path, lte->extracted_file);
104                         return WIMLIB_ERR_LINK;
105                 }
106         } else {
107                 int num_path_components;
108                 int num_output_dir_path_components;
109                 size_t extracted_file_len;
110                 char *p;
111                 const char *p2;
112                 size_t i;
113
114                 num_path_components = get_num_path_components(dentry->_full_path) - 1;
115                 num_output_dir_path_components = get_num_path_components(args->target);
116
117                 if (args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) {
118                         num_path_components++;
119                         num_output_dir_path_components--;
120                 }
121                 extracted_file_len = strlen(lte->extracted_file);
122
123                 char buf[extracted_file_len + 3 * num_path_components + 1];
124                 p = &buf[0];
125
126                 for (i = 0; i < num_path_components; i++) {
127                         *p++ = '.';
128                         *p++ = '.';
129                         *p++ = '/';
130                 }
131                 p2 = lte->extracted_file;
132                 while (*p2 == '/')
133                         p2++;
134                 while (num_output_dir_path_components > 0) {
135                         p2 = path_next_part(p2);
136                         num_output_dir_path_components--;
137                 }
138                 strcpy(p, p2);
139                 if (symlink(buf, output_path) != 0) {
140                         ERROR_WITH_ERRNO("Failed to symlink `%s' to `%s'",
141                                          buf, lte->extracted_file);
142                         return WIMLIB_ERR_LINK;
143                 }
144         }
145         return 0;
146 }
147
148 static int
149 symlink_apply_unix_data(const char *link,
150                         const struct wimlib_unix_data *unix_data)
151 {
152         if (lchown(link, unix_data->uid, unix_data->gid)) {
153                 if (errno == EPERM) {
154                         /* Ignore */
155                         WARNING_WITH_ERRNO("failed to set symlink UNIX "
156                                            "owner/group on \"%s\"", link);
157                 } else {
158                         ERROR_WITH_ERRNO("failed to set symlink UNIX "
159                                          "owner/group on \"%s\"", link);
160                         return WIMLIB_ERR_INVALID_DENTRY;
161                 }
162         }
163         return 0;
164 }
165
166 static int
167 fd_apply_unix_data(int fd, const char *path,
168                    const struct wimlib_unix_data *unix_data)
169 {
170         if (fchown(fd, unix_data->uid, unix_data->gid)) {
171                 if (errno == EPERM) {
172                         WARNING_WITH_ERRNO("failed to set file UNIX "
173                                            "owner/group on \"%s\"", path);
174                         /* Ignore? */
175                 } else {
176                         ERROR_WITH_ERRNO("failed to set file UNIX "
177                                          "owner/group on \"%s\"", path);
178                         return WIMLIB_ERR_INVALID_DENTRY;
179                 }
180         }
181
182         if (fchmod(fd, unix_data->mode)) {
183                 if (errno == EPERM) {
184                         WARNING_WITH_ERRNO("failed to set UNIX file mode "
185                                            "on \"%s\"", path);
186                         /* Ignore? */
187                 } else {
188                         ERROR_WITH_ERRNO("failed to set UNIX file mode "
189                                          "on \"%s\"", path);
190                         return WIMLIB_ERR_INVALID_DENTRY;
191                 }
192         }
193         return 0;
194 }
195
196 static int
197 dir_apply_unix_data(const char *dir, const struct wimlib_unix_data *unix_data)
198 {
199         int dfd = open(dir, O_RDONLY);
200         int ret;
201         if (dfd >= 0) {
202                 ret = fd_apply_unix_data(dfd, dir, unix_data);
203                 if (close(dfd)) {
204                         ERROR_WITH_ERRNO("can't close directory `%s'", dir);
205                         ret = WIMLIB_ERR_MKDIR;
206                 }
207         } else {
208                 ERROR_WITH_ERRNO("can't open directory `%s'", dir);
209                 ret = WIMLIB_ERR_MKDIR;
210         }
211         return ret;
212 }
213
214 static int
215 extract_regular_file_unlinked(struct wim_dentry *dentry,
216                               struct apply_args *args,
217                               const char *output_path,
218                               struct wim_lookup_table_entry *lte)
219 {
220         /* Normal mode of extraction.  Regular files and hard links are
221          * extracted in the way that they appear in the WIM. */
222
223         int out_fd;
224         int ret;
225         struct wim_inode *inode = dentry->d_inode;
226
227         if (!((args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE)
228                 && (args->extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
229                                      WIMLIB_EXTRACT_FLAG_HARDLINK))))
230         {
231                 /* If the dentry is part of a hard link set of at least 2
232                  * dentries and one of the other dentries has already been
233                  * extracted, make a hard link to the file corresponding to this
234                  * already-extracted directory.  Otherwise, extract the file and
235                  * set the inode->i_extracted_file field so that other dentries
236                  * in the hard link group can link to it. */
237                 if (inode->i_nlink > 1) {
238                         if (inode->i_extracted_file) {
239                                 DEBUG("Extracting hard link `%s' => `%s'",
240                                       output_path, inode->i_extracted_file);
241                                 if (link(inode->i_extracted_file, output_path) != 0) {
242                                         ERROR_WITH_ERRNO("Failed to hard link "
243                                                          "`%s' to `%s'",
244                                                          output_path,
245                                                          inode->i_extracted_file);
246                                         return WIMLIB_ERR_LINK;
247                                 }
248                                 return 0;
249                         }
250                         FREE(inode->i_extracted_file);
251                         inode->i_extracted_file = STRDUP(output_path);
252                         if (!inode->i_extracted_file) {
253                                 ERROR("Failed to allocate memory for filename");
254                                 return WIMLIB_ERR_NOMEM;
255                         }
256                 }
257         }
258
259         /* Extract the contents of the file to @output_path. */
260
261         out_fd = open(output_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
262         if (out_fd == -1) {
263                 ERROR_WITH_ERRNO("Failed to open the file `%s' for writing",
264                                  output_path);
265                 return WIMLIB_ERR_OPEN;
266         }
267
268         if (!lte) {
269                 /* Empty file with no lookup table entry */
270                 DEBUG("Empty file `%s'.", output_path);
271                 ret = 0;
272                 goto out_extract_unix_data;
273         }
274
275         ret = extract_wim_resource_to_fd(lte, out_fd, wim_resource_size(lte));
276         if (ret) {
277                 ERROR("Failed to extract resource to `%s'", output_path);
278                 goto out;
279         }
280
281 out_extract_unix_data:
282         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
283                 struct wimlib_unix_data unix_data;
284                 ret = inode_get_unix_data(inode, &unix_data, NULL);
285                 if (ret > 0)
286                         ;
287                 else if (ret < 0)
288                         ret = 0;
289                 else
290                         ret = fd_apply_unix_data(out_fd, output_path, &unix_data);
291                 if (ret)
292                         goto out;
293         }
294         if (lte)
295                 args->progress.extract.completed_bytes += wim_resource_size(lte);
296 out:
297         if (close(out_fd) != 0) {
298                 ERROR_WITH_ERRNO("Failed to close file `%s'", output_path);
299                 if (ret == 0)
300                         ret = WIMLIB_ERR_WRITE;
301         }
302         return ret;
303 }
304
305 static int
306 extract_regular_file(struct wim_dentry *dentry,
307                      struct apply_args *args,
308                      const char *output_path)
309 {
310         struct wim_lookup_table_entry *lte;
311         const struct wim_inode *inode = dentry->d_inode;
312
313         lte = inode_unnamed_lte_resolved(inode);
314
315         if (lte && (args->extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
316                                            WIMLIB_EXTRACT_FLAG_HARDLINK)))
317         {
318                 if (lte->extracted_file) {
319                         return extract_regular_file_linked(dentry, output_path, args, lte);
320                 } else {
321                         lte->extracted_file = STRDUP(output_path);
322                         if (!lte->extracted_file)
323                                 return WIMLIB_ERR_NOMEM;
324                 }
325         }
326         return extract_regular_file_unlinked(dentry, args, output_path, lte);
327 }
328
329 static int
330 extract_symlink(struct wim_dentry *dentry,
331                 struct apply_args *args,
332                 const char *output_path)
333 {
334         char target[4096 + args->target_realpath_len];
335         char *fixed_target;
336         const struct wim_inode *inode = dentry->d_inode;
337
338         ssize_t ret = wim_inode_readlink(inode,
339                                          target + args->target_realpath_len,
340                                          sizeof(target) - args->target_realpath_len - 1);
341         struct wim_lookup_table_entry *lte;
342
343         if (ret <= 0) {
344                 ERROR("Could not read the symbolic link from dentry `%s'",
345                       dentry->_full_path);
346                 return WIMLIB_ERR_INVALID_DENTRY;
347         }
348         target[args->target_realpath_len + ret] = '\0';
349         if (target[args->target_realpath_len] == '/' &&
350             args->extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX)
351         {
352                 /* Fix absolute symbolic link target to point into the actual
353                  * extraction destination */
354                 memcpy(target, args->target_realpath,
355                        args->target_realpath_len);
356                 fixed_target = target;
357         } else {
358                 /* Keep same link target */
359                 fixed_target = target + args->target_realpath_len;
360         }
361         ret = symlink(fixed_target, output_path);
362         if (ret) {
363                 ERROR_WITH_ERRNO("Failed to symlink `%s' to `%s'",
364                                  output_path, fixed_target);
365                 return WIMLIB_ERR_LINK;
366         }
367         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
368                 struct wimlib_unix_data unix_data;
369                 ret = inode_get_unix_data(inode, &unix_data, NULL);
370                 if (ret > 0)
371                         ;
372                 else if (ret < 0)
373                         ret = 0;
374                 else
375                         ret = symlink_apply_unix_data(output_path, &unix_data);
376                 if (ret)
377                         return ret;
378         }
379         lte = inode_unnamed_lte_resolved(inode);
380         wimlib_assert(lte != NULL);
381         args->progress.extract.completed_bytes += wim_resource_size(lte);
382         return 0;
383 }
384
385 #endif /* !__WIN32__ */
386
387 static int
388 extract_directory(struct wim_dentry *dentry,
389                   const tchar *output_path, bool is_root)
390 {
391         int ret;
392         struct stat stbuf;
393
394         ret = tstat(output_path, &stbuf);
395         if (ret == 0) {
396                 if (S_ISDIR(stbuf.st_mode)) {
397                         /*if (!is_root)*/
398                                 /*WARNING("`%s' already exists", output_path);*/
399                         goto dir_exists;
400                 } else {
401                         ERROR("`%"TS"' is not a directory", output_path);
402                         return WIMLIB_ERR_MKDIR;
403                 }
404         } else {
405                 if (errno != ENOENT) {
406                         ERROR_WITH_ERRNO("Failed to stat `%"TS"'", output_path);
407                         return WIMLIB_ERR_STAT;
408                 }
409         }
410
411         if (tmkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH))
412         {
413                 ERROR_WITH_ERRNO("Cannot create directory `%"TS"'", output_path);
414                 return WIMLIB_ERR_MKDIR;
415         }
416 dir_exists:
417         ret = 0;
418 #ifndef __WIN32__
419         if (dentry) {
420                 struct wimlib_unix_data unix_data;
421                 ret = inode_get_unix_data(dentry->d_inode, &unix_data, NULL);
422                 if (ret > 0)
423                         ;
424                 else if (ret < 0)
425                         ret = 0;
426                 else
427                         ret = dir_apply_unix_data(output_path, &unix_data);
428         }
429 #endif
430         return ret;
431 }
432
433 #ifndef __WIN32__
434 static int
435 unix_do_apply_dentry(const char *output_path, size_t output_path_len,
436                      struct wim_dentry *dentry, struct apply_args *args)
437 {
438         const struct wim_inode *inode = dentry->d_inode;
439
440         if (inode_is_symlink(inode))
441                 return extract_symlink(dentry, args, output_path);
442         else if (inode_is_directory(inode))
443                 return extract_directory((args->extract_flags &
444                                            WIMLIB_EXTRACT_FLAG_UNIX_DATA) ? dentry : NULL,
445                                          output_path, false);
446         else
447                 return extract_regular_file(dentry, args, output_path);
448 }
449
450 static int
451 unix_do_apply_dentry_timestamps(const char *output_path,
452                                 size_t output_path_len,
453                                 const struct wim_dentry *dentry,
454                                 struct apply_args *args)
455 {
456         int ret;
457         const struct wim_inode *inode = dentry->d_inode;
458
459 #ifdef HAVE_UTIMENSAT
460         /* Convert the WIM timestamps, which are accurate to 100 nanoseconds,
461          * into `struct timespec's for passing to utimensat(), which is accurate
462          * to 1 nanosecond. */
463
464         struct timespec ts[2];
465         ts[0] = wim_timestamp_to_timespec(inode->i_last_access_time);
466         ts[1] = wim_timestamp_to_timespec(inode->i_last_write_time);
467         ret = utimensat(AT_FDCWD, output_path, ts, AT_SYMLINK_NOFOLLOW);
468         if (ret)
469                 ret = errno;
470 #else
471         ret = ENOSYS;
472 #endif
473
474         if (ret == ENOSYS) {
475                 /* utimensat() not implemented or not available */
476         #ifdef HAVE_LUTIMES
477                 /* Convert the WIM timestamps, which are accurate to 100
478                  * nanoseconds, into `struct timeval's for passing to lutimes(),
479                  * which is accurate to 1 microsecond. */
480                 struct timeval tv[2];
481                 tv[0] = wim_timestamp_to_timeval(inode->i_last_access_time);
482                 tv[1] = wim_timestamp_to_timeval(inode->i_last_write_time);
483                 ret = lutimes(output_path, tv);
484                 if (ret)
485                         ret = errno;
486         #endif
487         }
488
489         if (ret == ENOSYS) {
490                 /* utimensat() and lutimes() both not implemented or not
491                  * available */
492         #ifdef HAVE_UTIME
493                 /* Convert the WIM timestamps, which are accurate to 100
494                  * nanoseconds, into a `struct utimbuf's for passing to
495                  * utime(), which is accurate to 1 second. */
496                 struct utimbuf buf;
497                 buf.actime = wim_timestamp_to_unix(inode->i_last_access_time);
498                 buf.modtime = wim_timestamp_to_unix(inode->i_last_write_time);
499                 ret = utime(output_path, &buf);
500         #endif
501         }
502         if (ret && args->num_utime_warnings < 10) {
503                 WARNING_WITH_ERRNO("Failed to set timestamp on file `%s'",
504                                     output_path);
505                 args->num_utime_warnings++;
506         }
507         return 0;
508 }
509 #endif /* !__WIN32__ */
510
511 /* Extracts a file, directory, or symbolic link from the WIM archive. */
512 static int
513 apply_dentry_normal(struct wim_dentry *dentry, void *arg)
514 {
515         struct apply_args *args = arg;
516         size_t len;
517         tchar *output_path;
518
519         len = tstrlen(args->target);
520         if (dentry_is_root(dentry)) {
521                 output_path = (tchar*)args->target;
522         } else {
523                 output_path = alloca(len * sizeof(tchar) + dentry->full_path_nbytes +
524                                      sizeof(tchar));
525                 memcpy(output_path, args->target, len * sizeof(tchar));
526                 memcpy(output_path + len, dentry->_full_path, dentry->full_path_nbytes);
527                 len += dentry->full_path_nbytes / sizeof(tchar);
528                 output_path[len] = T('\0');
529         }
530 #ifdef __WIN32__
531         return win32_do_apply_dentry(output_path, len, dentry, args);
532 #else
533         return unix_do_apply_dentry(output_path, len, dentry, args);
534 #endif
535 }
536
537
538 /* Apply timestamps to an extracted file or directory */
539 static int
540 apply_dentry_timestamps_normal(struct wim_dentry *dentry, void *arg)
541 {
542         struct apply_args *args = arg;
543         size_t len;
544         tchar *output_path;
545
546         len = tstrlen(args->target);
547         if (dentry_is_root(dentry)) {
548                 output_path = (tchar*)args->target;
549         } else {
550                 output_path = alloca(len * sizeof(tchar) + dentry->full_path_nbytes +
551                                      sizeof(tchar));
552                 memcpy(output_path, args->target, len * sizeof(tchar));
553                 memcpy(output_path + len, dentry->_full_path, dentry->full_path_nbytes);
554                 len += dentry->full_path_nbytes / sizeof(tchar);
555                 output_path[len] = T('\0');
556         }
557
558 #ifdef __WIN32__
559         return win32_do_apply_dentry_timestamps(output_path, len, dentry, args);
560 #else
561         return unix_do_apply_dentry_timestamps(output_path, len, dentry, args);
562 #endif
563 }
564
565 /* Extract a dentry if it hasn't already been extracted and either
566  * WIMLIB_EXTRACT_FLAG_NO_STREAMS is not specified, or the dentry is a directory
567  * and/or has no unnamed stream. */
568 static int
569 maybe_apply_dentry(struct wim_dentry *dentry, void *arg)
570 {
571         struct apply_args *args = arg;
572         int ret;
573
574         if (dentry->is_extracted)
575                 return 0;
576
577         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS &&
578             !dentry_is_directory(dentry) &&
579             inode_unnamed_lte_resolved(dentry->d_inode) != NULL)
580                 return 0;
581
582         if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) &&
583              args->progress_func) {
584                 args->progress.extract.cur_path = dentry->_full_path;
585                 args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY,
586                                     &args->progress);
587         }
588         ret = args->apply_dentry(dentry, args);
589         if (ret == 0)
590                 dentry->is_extracted = 1;
591         return ret;
592 }
593
594 static void
595 calculate_bytes_to_extract(struct list_head *stream_list,
596                            int extract_flags,
597                            union wimlib_progress_info *progress)
598 {
599         struct wim_lookup_table_entry *lte;
600         u64 total_bytes = 0;
601         u64 num_streams = 0;
602
603         /* For each stream to be extracted... */
604         list_for_each_entry(lte, stream_list, extraction_list) {
605                 if (extract_flags &
606                     (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
607                 {
608                         /* In the symlink or hard link extraction mode, each
609                          * stream will be extracted one time regardless of how
610                          * many dentries share the stream. */
611                         wimlib_assert(!(extract_flags & WIMLIB_EXTRACT_FLAG_NTFS));
612                         if (!lte->extracted_file) {
613                                 num_streams++;
614                                 total_bytes += wim_resource_size(lte);
615                         }
616                 } else {
617                         num_streams += lte->out_refcnt;
618                         total_bytes += lte->out_refcnt * wim_resource_size(lte);
619                 }
620         }
621         progress->extract.num_streams = num_streams;
622         progress->extract.total_bytes = total_bytes;
623         progress->extract.completed_bytes = 0;
624 }
625
626 static void
627 maybe_add_stream_for_extraction(struct wim_lookup_table_entry *lte,
628                                 struct list_head *stream_list)
629 {
630         if (++lte->out_refcnt == 1) {
631                 INIT_LIST_HEAD(&lte->inode_list);
632                 list_add_tail(&lte->extraction_list, stream_list);
633         }
634 }
635
636 static void
637 inode_find_streams_for_extraction(struct wim_inode *inode,
638                                   struct list_head *stream_list,
639                                   int extract_flags)
640 {
641         struct wim_lookup_table_entry *lte;
642         bool inode_added = false;
643
644         lte = inode_unnamed_lte_resolved(inode);
645         if (lte) {
646                 maybe_add_stream_for_extraction(lte, stream_list);
647                 list_add_tail(&inode->i_lte_inode_list, &lte->inode_list);
648                 inode_added = true;
649         }
650
651         /* Determine whether to include alternate data stream entries or not.
652          *
653          * UNIX:  Include them if extracting using NTFS-3g.
654          *
655          * Windows: Include them undconditionally, although if the filesystem is
656          * not NTFS we won't actually be able to extract them. */
657 #if defined(WITH_NTFS_3G)
658         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS)
659 #elif defined(__WIN32__)
660         if (1)
661 #else
662         if (0)
663 #endif
664         {
665                 for (unsigned i = 0; i < inode->i_num_ads; i++) {
666                         if (inode->i_ads_entries[i].stream_name_nbytes != 0) {
667                                 lte = inode->i_ads_entries[i].lte;
668                                 if (lte) {
669                                         maybe_add_stream_for_extraction(lte,
670                                                                         stream_list);
671                                         if (!inode_added) {
672                                                 list_add_tail(&inode->i_lte_inode_list,
673                                                               &lte->inode_list);
674                                                 inode_added = true;
675                                         }
676                                 }
677                         }
678                 }
679         }
680 }
681
682 static void
683 find_streams_for_extraction(struct wim_image_metadata *imd,
684                             struct list_head *stream_list,
685                             struct wim_lookup_table *lookup_table,
686                             int extract_flags)
687 {
688         struct wim_inode *inode;
689         struct wim_dentry *dentry;
690
691         for_lookup_table_entry(lookup_table, lte_zero_out_refcnt, NULL);
692         INIT_LIST_HEAD(stream_list);
693         image_for_each_inode(inode, imd) {
694                 if (!inode->i_resolved)
695                         inode_resolve_ltes(inode, lookup_table);
696                 inode_for_each_dentry(dentry, inode)
697                         dentry->is_extracted = 0;
698                 inode_find_streams_for_extraction(inode, stream_list,
699                                                   extract_flags);
700         }
701 }
702
703 struct apply_operations {
704         int (*apply_dentry)(struct wim_dentry *dentry, void *arg);
705         int (*apply_dentry_timestamps)(struct wim_dentry *dentry, void *arg);
706 };
707
708 static const struct apply_operations normal_apply_operations = {
709         .apply_dentry = apply_dentry_normal,
710         .apply_dentry_timestamps = apply_dentry_timestamps_normal,
711 };
712
713 #ifdef WITH_NTFS_3G
714 static const struct apply_operations ntfs_apply_operations = {
715         .apply_dentry = apply_dentry_ntfs,
716         .apply_dentry_timestamps = apply_dentry_timestamps_ntfs,
717 };
718 #endif
719
720 static int
721 apply_stream_list(struct list_head *stream_list,
722                   struct apply_args *args,
723                   const struct apply_operations *ops,
724                   wimlib_progress_func_t progress_func)
725 {
726         uint64_t bytes_per_progress = args->progress.extract.total_bytes / 100;
727         uint64_t next_progress = bytes_per_progress;
728         struct wim_lookup_table_entry *lte;
729         struct wim_inode *inode;
730         struct wim_dentry *dentry;
731         int ret;
732
733         /* This complicated loop is essentially looping through the dentries,
734          * although dentries may be visited more than once (if a dentry contains
735          * two different nonempty streams) or not at all (if a dentry contains
736          * no non-empty streams).
737          *
738          * The outer loop is over the distinct streams to be extracted so that
739          * sequential reading of the WIM can be implemented. */
740
741         /* For each distinct stream to be extracted */
742         list_for_each_entry(lte, stream_list, extraction_list) {
743                 /* For each inode that contains the stream */
744                 list_for_each_entry(inode, &lte->inode_list, i_lte_inode_list) {
745                         /* For each dentry that points to the inode */
746                         inode_for_each_dentry(dentry, inode) {
747                                 /* Extract the dentry if it was not already
748                                  * extracted */
749                                 ret = maybe_apply_dentry(dentry, args);
750                                 if (ret)
751                                         return ret;
752                                 if (progress_func &&
753                                     args->progress.extract.completed_bytes >= next_progress)
754                                 {
755                                         progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS,
756                                                       &args->progress);
757                                         if (args->progress.extract.completed_bytes >=
758                                             args->progress.extract.total_bytes)
759                                         {
760                                                 next_progress = ~0ULL;
761                                         } else {
762                                                 next_progress =
763                                                         min (args->progress.extract.completed_bytes +
764                                                              bytes_per_progress,
765                                                              args->progress.extract.total_bytes);
766                                         }
767                                 }
768                         }
769                 }
770         }
771         return 0;
772 }
773
774 static int
775 sort_stream_list_by_wim_position(struct list_head *stream_list)
776 {
777         struct list_head *cur;
778         size_t num_streams;
779         struct wim_lookup_table_entry **array;
780         size_t i;
781         size_t array_size;
782
783         num_streams = 0;
784         list_for_each(cur, stream_list)
785                 num_streams++;
786         array_size = num_streams * sizeof(array[0]);
787         array = MALLOC(array_size);
788         if (!array) {
789                 ERROR("Failed to allocate %zu bytes to sort stream entries",
790                       array_size);
791                 return WIMLIB_ERR_NOMEM;
792         }
793         cur = stream_list->next;
794         for (i = 0; i < num_streams; i++) {
795                 array[i] = container_of(cur, struct wim_lookup_table_entry, extraction_list);
796                 cur = cur->next;
797         }
798
799         qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position);
800
801         INIT_LIST_HEAD(stream_list);
802         for (i = 0; i < num_streams; i++)
803                 list_add_tail(&array[i]->extraction_list, stream_list);
804         FREE(array);
805         return 0;
806 }
807
808
809 /* Extracts the image @image from the WIM @w to the directory or NTFS volume
810  * @target. */
811 static int
812 extract_single_image(WIMStruct *w, int image,
813                      const tchar *target, int extract_flags,
814                      wimlib_progress_func_t progress_func)
815 {
816         int ret;
817         struct list_head stream_list;
818
819         struct apply_args args;
820         const struct apply_operations *ops;
821
822         memset(&args, 0, sizeof(args));
823
824         args.w                  = w;
825         args.target             = target;
826         args.extract_flags      = extract_flags;
827         args.progress_func      = progress_func;
828
829         if (progress_func) {
830                 args.progress.extract.wimfile_name = w->filename;
831                 args.progress.extract.image = image;
832                 args.progress.extract.extract_flags = (extract_flags &
833                                                        WIMLIB_EXTRACT_MASK_PUBLIC);
834                 args.progress.extract.image_name = wimlib_get_image_name(w, image);
835                 args.progress.extract.target = target;
836         }
837
838 #ifdef WITH_NTFS_3G
839         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
840                 args.vol = ntfs_mount(target, 0);
841                 if (!args.vol) {
842                         ERROR_WITH_ERRNO("Failed to mount NTFS volume `%"TS"'",
843                                          target);
844                         return WIMLIB_ERR_NTFS_3G;
845                 }
846                 ops = &ntfs_apply_operations;
847         } else
848 #endif
849                 ops = &normal_apply_operations;
850
851         ret = select_wim_image(w, image);
852         if (ret)
853                 goto out;
854
855         /* Build a list of the streams that need to be extracted */
856         find_streams_for_extraction(wim_get_current_image_metadata(w),
857                                     &stream_list,
858                                     w->lookup_table, extract_flags);
859
860         /* Calculate the number of bytes of data that will be extracted */
861         calculate_bytes_to_extract(&stream_list, extract_flags,
862                                    &args.progress);
863
864         if (progress_func) {
865                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN,
866                               &args.progress);
867         }
868
869         /* If a sequential extraction was specified, sort the streams to be
870          * extracted by their position in the WIM file, so that the WIM file can
871          * be read sequentially. */
872         if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) {
873                 ret = sort_stream_list_by_wim_position(&stream_list);
874                 if (ret != 0) {
875                         WARNING("Falling back to non-sequential extraction");
876                         extract_flags &= ~WIMLIB_EXTRACT_FLAG_SEQUENTIAL;
877                 }
878         }
879
880         if (progress_func) {
881                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN,
882                               &args.progress);
883         }
884
885         ret = calculate_dentry_tree_full_paths(wim_root_dentry(w));
886         if (ret)
887                 goto out;
888
889         /* Make the directory structure and extract empty files */
890         args.extract_flags |= WIMLIB_EXTRACT_FLAG_NO_STREAMS;
891         args.apply_dentry = ops->apply_dentry;
892         ret = for_dentry_in_tree(wim_root_dentry(w), maybe_apply_dentry, &args);
893         args.extract_flags &= ~WIMLIB_EXTRACT_FLAG_NO_STREAMS;
894         if (ret)
895                 goto out;
896
897         if (progress_func) {
898                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END,
899                               &args.progress);
900         }
901
902         if (extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX) {
903                 args.target_realpath = realpath(target, NULL);
904                 if (!args.target_realpath)
905                         return WIMLIB_ERR_NOMEM;
906                 args.target_realpath_len = tstrlen(args.target_realpath);
907         }
908
909         /* Extract non-empty files */
910         ret = apply_stream_list(&stream_list, &args, ops, progress_func);
911         if (ret)
912                 goto out_free_target_realpath;
913
914         if (progress_func) {
915                 progress_func(WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS,
916                               &args.progress);
917         }
918
919         /* Apply timestamps */
920         ret = for_dentry_in_tree_depth(wim_root_dentry(w),
921                                        ops->apply_dentry_timestamps, &args);
922         if (ret)
923                 goto out_free_target_realpath;
924
925         if (progress_func) {
926                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END,
927                               &args.progress);
928         }
929 out_free_target_realpath:
930         FREE(args.target_realpath);
931 out:
932 #ifdef WITH_NTFS_3G
933         /* Unmount the NTFS volume */
934         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
935                 if (ntfs_umount(args.vol, FALSE) != 0) {
936                         ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%"TS"'",
937                                          args.target);
938                         if (ret == 0)
939                                 ret = WIMLIB_ERR_NTFS_3G;
940                 }
941         }
942 #endif
943         return ret;
944 }
945
946 static const tchar * const filename_forbidden_chars =
947 T(
948 #ifdef __WIN32__
949 "<>:\"/\\|?*"
950 #else
951 "/"
952 #endif
953 );
954
955 /* This function checks if it is okay to use a WIM image's name as a directory
956  * name.  */
957 static bool
958 image_name_ok_as_dir(const tchar *image_name)
959 {
960         return image_name && *image_name &&
961                 !tstrpbrk(image_name, filename_forbidden_chars);
962 }
963
964 /* Extracts all images from the WIM to the directory @target, with the images
965  * placed in subdirectories named by their image names. */
966 static int
967 extract_all_images(WIMStruct *w,
968                    const tchar *target,
969                    int extract_flags,
970                    wimlib_progress_func_t progress_func)
971 {
972         size_t image_name_max_len = max(xml_get_max_image_name_len(w), 20);
973         size_t output_path_len = tstrlen(target);
974         tchar buf[output_path_len + 1 + image_name_max_len + 1];
975         int ret;
976         int image;
977         const tchar *image_name;
978
979         ret = extract_directory(NULL, target, true);
980         if (ret)
981                 return ret;
982
983         tmemcpy(buf, target, output_path_len);
984         buf[output_path_len] = T('/');
985         for (image = 1; image <= w->hdr.image_count; image++) {
986                 image_name = wimlib_get_image_name(w, image);
987                 if (image_name_ok_as_dir(image_name)) {
988                         tstrcpy(buf + output_path_len + 1, image_name);
989                 } else {
990                         /* Image name is empty, or contains forbidden
991                          * characters. */
992                         tsprintf(buf + output_path_len + 1, T("%d"), image);
993                 }
994                 ret = extract_single_image(w, image, buf, extract_flags,
995                                            progress_func);
996                 if (ret != 0)
997                         return ret;
998         }
999         return 0;
1000 }
1001
1002 /* Extracts a single image or all images from a WIM file to a directory or NTFS
1003  * volume. */
1004 WIMLIBAPI int
1005 wimlib_extract_image(WIMStruct *w,
1006                      int image,
1007                      const tchar *target,
1008                      int extract_flags,
1009                      WIMStruct **additional_swms,
1010                      unsigned num_additional_swms,
1011                      wimlib_progress_func_t progress_func)
1012 {
1013         struct wim_lookup_table *joined_tab, *w_tab_save;
1014         int ret;
1015
1016         if (!target)
1017                 return WIMLIB_ERR_INVALID_PARAM;
1018
1019         extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
1020
1021         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
1022                         == (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
1023                 return WIMLIB_ERR_INVALID_PARAM;
1024
1025 #ifdef __WIN32__
1026         if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
1027                 ERROR("Extracting UNIX data is not supported on Windows");
1028                 return WIMLIB_ERR_INVALID_PARAM;
1029         }
1030         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK)) {
1031                 ERROR("Linked extraction modes are not supported on Windows");
1032                 return WIMLIB_ERR_INVALID_PARAM;
1033         }
1034 #endif
1035
1036         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
1037 #ifdef WITH_NTFS_3G
1038                 if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))) {
1039                         ERROR("Cannot specify symlink or hardlink flags when applying\n"
1040                               "        directly to a NTFS volume");
1041                         return WIMLIB_ERR_INVALID_PARAM;
1042                 }
1043                 if (image == WIMLIB_ALL_IMAGES) {
1044                         ERROR("Can only apply a single image when applying "
1045                               "directly to a NTFS volume");
1046                         return WIMLIB_ERR_INVALID_PARAM;
1047                 }
1048                 if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
1049                         ERROR("Cannot restore UNIX-specific data in the NTFS extraction mode");
1050                         return WIMLIB_ERR_INVALID_PARAM;
1051                 }
1052 #else
1053                 ERROR("wimlib was compiled without support for NTFS-3g, so");
1054                 ERROR("we cannot apply a WIM image directly to a NTFS volume");
1055                 return WIMLIB_ERR_UNSUPPORTED;
1056 #endif
1057         }
1058
1059         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
1060                               WIMLIB_EXTRACT_FLAG_RPFIX)) ==
1061                 (WIMLIB_EXTRACT_FLAG_RPFIX | WIMLIB_EXTRACT_FLAG_NORPFIX))
1062         {
1063                 ERROR("Cannot specify RPFIX and NORPFIX flags at the same time!");
1064                 return WIMLIB_ERR_INVALID_PARAM;
1065         }
1066
1067         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
1068                               WIMLIB_EXTRACT_FLAG_NORPFIX)) == 0)
1069                 if (w->hdr.flags & WIM_HDR_FLAG_RP_FIX)
1070                         extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX;
1071
1072         ret = verify_swm_set(w, additional_swms, num_additional_swms);
1073         if (ret)
1074                 return ret;
1075
1076         ret = wim_checksum_unhashed_streams(w);
1077         if (ret)
1078                 return ret;
1079
1080         if (num_additional_swms) {
1081                 ret = new_joined_lookup_table(w, additional_swms,
1082                                               num_additional_swms, &joined_tab);
1083                 if (ret)
1084                         return ret;
1085                 w_tab_save = w->lookup_table;
1086                 w->lookup_table = joined_tab;
1087         }
1088
1089         if (image == WIMLIB_ALL_IMAGES) {
1090                 extract_flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
1091                 ret = extract_all_images(w, target, extract_flags,
1092                                          progress_func);
1093         } else {
1094                 extract_flags &= ~WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
1095                 ret = extract_single_image(w, image, target, extract_flags,
1096                                            progress_func);
1097         }
1098
1099         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
1100                              WIMLIB_EXTRACT_FLAG_HARDLINK))
1101         {
1102                 for_lookup_table_entry(w->lookup_table,
1103                                        lte_free_extracted_file,
1104                                        NULL);
1105         }
1106
1107         if (num_additional_swms) {
1108                 free_lookup_table(w->lookup_table);
1109                 w->lookup_table = w_tab_save;
1110         }
1111         return ret;
1112 }