]> wimlib.net Git - wimlib/blob - src/extract.c
d6cc9c45d75ee854382a6c9c4d4fcffc7a02bcd0
[wimlib] / src / extract.c
1 /*
2  * extract.c
3  *
4  * Support for extracting WIM images, or files or directories contained in a WIM
5  * image.
6  */
7
8 /*
9  * Copyright (C) 2012, 2013 Eric Biggers
10  *
11  * This file is part of wimlib, a library for working with WIM files.
12  *
13  * wimlib is free software; you can redistribute it and/or modify it under the
14  * terms of the GNU General Public License as published by the Free
15  * Software Foundation; either version 3 of the License, or (at your option)
16  * any later version.
17  *
18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
21  * details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
25  */
26
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30
31 #ifdef __WIN32__
32 #  include "wimlib/win32_common.h" /* For GetFullPathName() */
33 #endif
34
35 #include "wimlib/apply.h"
36 #include "wimlib/dentry.h"
37 #include "wimlib/encoding.h"
38 #include "wimlib/endianness.h"
39 #include "wimlib/error.h"
40 #include "wimlib/lookup_table.h"
41 #include "wimlib/paths.h"
42 #include "wimlib/resource.h"
43 #include "wimlib/swm.h"
44 #ifdef __WIN32__
45 #  include "wimlib/win32.h" /* for realpath() equivalent */
46 #endif
47 #include "wimlib/xml.h"
48
49 #include <errno.h>
50 #include <limits.h>
51 #ifdef WITH_NTFS_3G
52 #  include <ntfs-3g/volume.h> /* for ntfs_mount(), ntfs_umount() */
53 #endif
54 #include <stdlib.h>
55 #include <sys/stat.h>
56 #include <unistd.h>
57
58 #define MAX_EXTRACT_LONG_PATH_WARNINGS 5
59
60 static int
61 do_apply_op(struct wim_dentry *dentry, struct apply_args *args,
62             int (*apply_dentry_func)(const tchar *, size_t,
63                                      struct wim_dentry *, struct apply_args *))
64 {
65         tchar *p;
66         size_t extraction_path_nchars;
67         struct wim_dentry *d;
68         LIST_HEAD(ancestor_list);
69         const tchar *target;
70         size_t target_nchars;
71
72 #ifdef __WIN32__
73         if (args->target_lowlevel_path) {
74                 target = args->target_lowlevel_path;
75                 target_nchars = args->target_lowlevel_path_nchars;
76         } else
77 #endif
78         {
79                 target = args->target;
80                 target_nchars = args->target_nchars;
81         }
82
83         extraction_path_nchars = target_nchars;
84
85         for (d = dentry; d != args->extract_root; d = d->parent) {
86                 if (d->not_extracted)
87                         return 0;
88                 extraction_path_nchars += d->extraction_name_nchars + 1;
89                 list_add(&d->tmp_list, &ancestor_list);
90         }
91
92         tchar extraction_path[extraction_path_nchars + 1];
93         p = tmempcpy(extraction_path, target, target_nchars);
94
95
96         list_for_each_entry(d, &ancestor_list, tmp_list) {
97                 *p++ = OS_PREFERRED_PATH_SEPARATOR;
98                 p = tmempcpy(p, d->extraction_name, d->extraction_name_nchars);
99         }
100         *p = T('\0');
101
102 #ifdef __WIN32__
103         /* Warn the user if the path exceeds MAX_PATH */
104
105         /* + 1 for '\0', -4 for \\?\.  */
106         if (extraction_path_nchars + 1 - 4 > MAX_PATH) {
107                 if (dentry->needs_extraction &&
108                     args->num_long_paths < MAX_EXTRACT_LONG_PATH_WARNINGS)
109                 {
110                         WARNING("Path \"%ls\" exceeds MAX_PATH and will not be accessible "
111                                 "to most Windows software", extraction_path);
112                         if (++args->num_long_paths == MAX_EXTRACT_LONG_PATH_WARNINGS)
113                                 WARNING("Suppressing further warnings about long paths");
114                 }
115         }
116 #endif
117         return (*apply_dentry_func)(extraction_path, extraction_path_nchars,
118                                     dentry, args);
119 }
120
121
122 /* Extracts a file, directory, or symbolic link from the WIM archive. */
123 static int
124 apply_dentry_normal(struct wim_dentry *dentry, void *arg)
125 {
126 #ifdef __WIN32__
127         return do_apply_op(dentry, arg, win32_do_apply_dentry);
128 #else
129         return do_apply_op(dentry, arg, unix_do_apply_dentry);
130 #endif
131 }
132
133
134 /* Apply timestamps to an extracted file or directory */
135 static int
136 apply_dentry_timestamps_normal(struct wim_dentry *dentry, void *arg)
137 {
138 #ifdef __WIN32__
139         return do_apply_op(dentry, arg, win32_do_apply_dentry_timestamps);
140 #else
141         return do_apply_op(dentry, arg, unix_do_apply_dentry_timestamps);
142 #endif
143 }
144
145 static bool
146 dentry_is_dot_or_dotdot(const struct wim_dentry *dentry)
147 {
148         const utf16lechar *file_name = dentry->file_name;
149         return file_name != NULL &&
150                 file_name[0] == cpu_to_le16('.') &&
151                 (file_name[1] == cpu_to_le16('\0') ||
152                  (file_name[1] == cpu_to_le16('.') &&
153                   file_name[2] == cpu_to_le16('\0')));
154 }
155
156 /* Extract a dentry if it hasn't already been extracted and either
157  * WIMLIB_EXTRACT_FLAG_NO_STREAMS is not specified, or the dentry is a directory
158  * and/or has no unnamed stream. */
159 static int
160 maybe_apply_dentry(struct wim_dentry *dentry, void *arg)
161 {
162         struct apply_args *args = arg;
163         int ret;
164
165         if (!dentry->needs_extraction)
166                 return 0;
167
168         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS &&
169             !dentry_is_directory(dentry) &&
170             inode_unnamed_lte_resolved(dentry->d_inode) != NULL)
171                 return 0;
172
173         if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) &&
174              args->progress_func) {
175                 ret = calculate_dentry_full_path(dentry);
176                 if (ret)
177                         return ret;
178                 args->progress.extract.cur_path = dentry->_full_path;
179                 args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY,
180                                     &args->progress);
181         }
182         ret = args->apply_dentry(dentry, args);
183         if (ret == 0)
184                 dentry->needs_extraction = 0;
185         return ret;
186 }
187
188 static void
189 calculate_bytes_to_extract(struct list_head *stream_list,
190                            int extract_flags,
191                            union wimlib_progress_info *progress)
192 {
193         struct wim_lookup_table_entry *lte;
194         u64 total_bytes = 0;
195         u64 num_streams = 0;
196
197         /* For each stream to be extracted... */
198         list_for_each_entry(lte, stream_list, extraction_list) {
199                 if (extract_flags &
200                     (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
201                 {
202                         /* In the symlink or hard link extraction mode, each
203                          * stream will be extracted one time regardless of how
204                          * many dentries share the stream. */
205                         wimlib_assert(!(extract_flags & WIMLIB_EXTRACT_FLAG_NTFS));
206                         if (!lte->extracted_file) {
207                                 num_streams++;
208                                 total_bytes += wim_resource_size(lte);
209                         }
210                 } else {
211                         num_streams += lte->out_refcnt;
212                         total_bytes += lte->out_refcnt * wim_resource_size(lte);
213                 }
214         }
215         progress->extract.num_streams = num_streams;
216         progress->extract.total_bytes = total_bytes;
217         progress->extract.completed_bytes = 0;
218 }
219
220 static void
221 maybe_add_stream_for_extraction(struct wim_lookup_table_entry *lte,
222                                 struct list_head *stream_list)
223 {
224         if (++lte->out_refcnt == 1) {
225                 INIT_LIST_HEAD(&lte->lte_dentry_list);
226                 list_add_tail(&lte->extraction_list, stream_list);
227         }
228 }
229
230 struct find_streams_ctx {
231         struct list_head stream_list;
232         int extract_flags;
233 };
234
235 static int
236 dentry_find_streams_to_extract(struct wim_dentry *dentry, void *_ctx)
237 {
238         struct find_streams_ctx *ctx = _ctx;
239         struct wim_inode *inode = dentry->d_inode;
240         struct wim_lookup_table_entry *lte;
241         bool dentry_added = false;
242         struct list_head *stream_list = &ctx->stream_list;
243         int extract_flags = ctx->extract_flags;
244
245         if (!dentry->needs_extraction)
246                 return 0;
247
248         lte = inode_unnamed_lte_resolved(inode);
249         if (lte) {
250                 if (!inode->i_visited)
251                         maybe_add_stream_for_extraction(lte, stream_list);
252                 list_add_tail(&dentry->extraction_stream_list, &lte->lte_dentry_list);
253                 dentry_added = true;
254         }
255
256         /* Determine whether to include alternate data stream entries or not.
257          *
258          * UNIX:  Include them if extracting using NTFS-3g.
259          *
260          * Windows: Include them undconditionally, although if the filesystem is
261          * not NTFS we won't actually be able to extract them. */
262 #if defined(WITH_NTFS_3G)
263         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS)
264 #elif defined(__WIN32__)
265         if (1)
266 #else
267         if (0)
268 #endif
269         {
270                 for (unsigned i = 0; i < inode->i_num_ads; i++) {
271                         if (inode->i_ads_entries[i].stream_name_nbytes != 0) {
272                                 lte = inode->i_ads_entries[i].lte;
273                                 if (lte) {
274                                         if (!inode->i_visited) {
275                                                 maybe_add_stream_for_extraction(lte,
276                                                                                 stream_list);
277                                         }
278                                         if (!dentry_added) {
279                                                 list_add_tail(&dentry->extraction_stream_list,
280                                                               &lte->lte_dentry_list);
281                                                 dentry_added = true;
282                                         }
283                                 }
284                         }
285                 }
286         }
287         inode->i_visited = 1;
288         return 0;
289 }
290
291 static int
292 dentry_resolve_and_zero_lte_refcnt(struct wim_dentry *dentry, void *_lookup_table)
293 {
294         struct wim_inode *inode = dentry->d_inode;
295         struct wim_lookup_table *lookup_table = _lookup_table;
296         struct wim_lookup_table_entry *lte;
297         int ret;
298
299         ret = inode_resolve_ltes(inode, lookup_table);
300         if (ret)
301                 return ret;
302         for (unsigned i = 0; i <= inode->i_num_ads; i++) {
303                 lte = inode_stream_lte_resolved(inode, i);
304                 if (lte)
305                         lte->out_refcnt = 0;
306         }
307         return 0;
308 }
309
310 static int
311 find_streams_for_extraction(struct wim_dentry *root,
312                             struct list_head *stream_list,
313                             struct wim_lookup_table *lookup_table,
314                             int extract_flags)
315 {
316         struct find_streams_ctx ctx;
317         int ret;
318
319         INIT_LIST_HEAD(&ctx.stream_list);
320         ctx.extract_flags = extract_flags;
321         ret = for_dentry_in_tree(root, dentry_resolve_and_zero_lte_refcnt, lookup_table);
322         if (ret)
323                 return ret;
324         for_dentry_in_tree(root, dentry_find_streams_to_extract, &ctx);
325         list_transfer(&ctx.stream_list, stream_list);
326         return 0;
327 }
328
329 struct apply_operations {
330         int (*apply_dentry)(struct wim_dentry *dentry, void *arg);
331         int (*apply_dentry_timestamps)(struct wim_dentry *dentry, void *arg);
332 };
333
334 static const struct apply_operations normal_apply_operations = {
335         .apply_dentry = apply_dentry_normal,
336         .apply_dentry_timestamps = apply_dentry_timestamps_normal,
337 };
338
339 #ifdef WITH_NTFS_3G
340 static const struct apply_operations ntfs_apply_operations = {
341         .apply_dentry = apply_dentry_ntfs,
342         .apply_dentry_timestamps = apply_dentry_timestamps_ntfs,
343 };
344 #endif
345
346 static int
347 apply_stream_list(struct list_head *stream_list,
348                   struct apply_args *args,
349                   const struct apply_operations *ops,
350                   wimlib_progress_func_t progress_func)
351 {
352         uint64_t bytes_per_progress = args->progress.extract.total_bytes / 100;
353         uint64_t next_progress = bytes_per_progress;
354         struct wim_lookup_table_entry *lte;
355         struct wim_dentry *dentry;
356         int ret;
357
358         /* This complicated loop is essentially looping through the dentries,
359          * although dentries may be visited more than once (if a dentry contains
360          * two different nonempty streams) or not at all (if a dentry contains
361          * no non-empty streams).
362          *
363          * The outer loop is over the distinct streams to be extracted so that
364          * sequential reading of the WIM can be implemented. */
365
366         /* For each distinct stream to be extracted */
367         list_for_each_entry(lte, stream_list, extraction_list) {
368                 /* For each dentry to be extracted that is a name for an inode
369                  * containing the stream */
370                 list_for_each_entry(dentry, &lte->lte_dentry_list, extraction_stream_list) {
371                         /* Extract the dentry if it was not already
372                          * extracted */
373                         ret = maybe_apply_dentry(dentry, args);
374                         if (ret)
375                                 return ret;
376                         if (progress_func &&
377                             args->progress.extract.completed_bytes >= next_progress)
378                         {
379                                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS,
380                                               &args->progress);
381                                 if (args->progress.extract.completed_bytes >=
382                                     args->progress.extract.total_bytes)
383                                 {
384                                         next_progress = ~0ULL;
385                                 } else {
386                                         next_progress =
387                                                 min (args->progress.extract.completed_bytes +
388                                                      bytes_per_progress,
389                                                      args->progress.extract.total_bytes);
390                                 }
391                         }
392                 }
393         }
394         return 0;
395 }
396
397 static int
398 sort_stream_list_by_wim_position(struct list_head *stream_list)
399 {
400         struct list_head *cur;
401         size_t num_streams;
402         struct wim_lookup_table_entry **array;
403         size_t i;
404         size_t array_size;
405
406         num_streams = 0;
407         list_for_each(cur, stream_list)
408                 num_streams++;
409         array_size = num_streams * sizeof(array[0]);
410         array = MALLOC(array_size);
411         if (!array) {
412                 ERROR("Failed to allocate %zu bytes to sort stream entries",
413                       array_size);
414                 return WIMLIB_ERR_NOMEM;
415         }
416         cur = stream_list->next;
417         for (i = 0; i < num_streams; i++) {
418                 array[i] = container_of(cur, struct wim_lookup_table_entry, extraction_list);
419                 cur = cur->next;
420         }
421
422         qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position);
423
424         INIT_LIST_HEAD(stream_list);
425         for (i = 0; i < num_streams; i++)
426                 list_add_tail(&array[i]->extraction_list, stream_list);
427         FREE(array);
428         return 0;
429 }
430
431 /*
432  * Extract a dentry to standard output.
433  *
434  * This obviously doesn't make sense in all cases.  We return an error if the
435  * dentry does not correspond to a regular file.  Otherwise we extract the
436  * unnamed data stream only.
437  */
438 static int
439 extract_dentry_to_stdout(struct wim_dentry *dentry)
440 {
441         int ret = 0;
442         if (dentry->d_inode->i_attributes & (FILE_ATTRIBUTE_REPARSE_POINT |
443                                              FILE_ATTRIBUTE_DIRECTORY))
444         {
445                 ERROR("\"%"TS"\" is not a regular file and therefore cannot be "
446                       "extracted to standard output", dentry_full_path(dentry));
447                 ret = WIMLIB_ERR_NOT_A_REGULAR_FILE;
448         } else {
449                 struct wim_lookup_table_entry *lte;
450
451                 lte = inode_unnamed_lte_resolved(dentry->d_inode);
452                 if (lte) {
453                         ret = extract_wim_resource_to_fd(lte, STDOUT_FILENO,
454                                                          wim_resource_size(lte));
455                 }
456         }
457         return ret;
458 }
459
460 #ifdef __WIN32__
461 static const utf16lechar replacement_char = cpu_to_le16(0xfffd);
462 #else
463 static const utf16lechar replacement_char = cpu_to_le16('?');
464 #endif
465
466 static bool
467 file_name_valid(utf16lechar *name, size_t num_chars, bool fix)
468 {
469         size_t i;
470
471         if (num_chars == 0)
472                 return true;
473         for (i = 0; i < num_chars; i++) {
474                 switch (name[i]) {
475         #ifdef __WIN32__
476                 case cpu_to_le16('\\'):
477                 case cpu_to_le16(':'):
478                 case cpu_to_le16('*'):
479                 case cpu_to_le16('?'):
480                 case cpu_to_le16('"'):
481                 case cpu_to_le16('<'):
482                 case cpu_to_le16('>'):
483                 case cpu_to_le16('|'):
484         #endif
485                 case cpu_to_le16('/'):
486                 case cpu_to_le16('\0'):
487                         if (fix)
488                                 name[i] = replacement_char;
489                         else
490                                 return false;
491                 }
492         }
493
494 #ifdef __WIN32__
495         if (name[num_chars - 1] == cpu_to_le16(' ') ||
496             name[num_chars - 1] == cpu_to_le16('.'))
497         {
498                 if (fix)
499                         name[num_chars - 1] = replacement_char;
500                 else
501                         return false;
502         }
503 #endif
504         return true;
505 }
506
507 /*
508  * dentry_calculate_extraction_path-
509  *
510  * Calculate the actual filename component at which a WIM dentry will be
511  * extracted, handling invalid filenames "properly".
512  *
513  * dentry->extraction_name usually will be set the same as dentry->file_name (on
514  * UNIX, converted into the platform's multibyte encoding).  However, if the
515  * file name contains characters that are not valid on the current platform or
516  * has some other format that is not valid, leave dentry->extraction_name as
517  * NULL and clear dentry->needs_extraction to indicate that this dentry should
518  * not be extracted, unless the appropriate flag
519  * WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES is set in the extract flags, in
520  * which case a substitute filename will be created and set instead.
521  *
522  * Conflicts with case-insensitive names on Windows are handled similarly; see
523  * below.
524  */
525 static int
526 dentry_calculate_extraction_path(struct wim_dentry *dentry, void *_args)
527 {
528         struct apply_args *args = _args;
529         int ret;
530
531         dentry->needs_extraction = 1;
532
533         if (dentry == args->extract_root)
534                 return 0;
535
536         if (dentry_is_dot_or_dotdot(dentry)) {
537                 /* WIM files shouldn't contain . or .. entries.  But if they are
538                  * there, don't attempt to extract them. */
539                 WARNING("Skipping extraction of unexpected . or .. file \"%"TS"\"",
540                         dentry_full_path(dentry));
541                 goto skip_dentry;
542         }
543
544 #ifdef __WIN32__
545         struct wim_dentry *other;
546         list_for_each_entry(other, &dentry->case_insensitive_conflict_list,
547                             case_insensitive_conflict_list)
548         {
549                 if (other->needs_extraction) {
550                         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS)
551                         {
552                                 WARNING("\"%"TS"\" has the same case-insensitive "
553                                         "name as \"%"TS"\"; extracting dummy name instead",
554                                         dentry_full_path(dentry),
555                                         dentry_full_path(other));
556                                 goto out_replace;
557                         } else {
558                                 WARNING("Not extracting \"%"TS"\": has same case-insensitive "
559                                         "name as \"%"TS"\"",
560                                         dentry_full_path(dentry),
561                                         dentry_full_path(other));
562                                 goto skip_dentry;
563                         }
564                 }
565         }
566 #endif
567
568         if (file_name_valid(dentry->file_name, dentry->file_name_nbytes / 2, false)) {
569 #ifdef __WIN32__
570                 dentry->extraction_name = dentry->file_name;
571                 dentry->extraction_name_nchars = dentry->file_name_nbytes / 2;
572                 return 0;
573 #else
574                 return utf16le_to_tstr(dentry->file_name,
575                                        dentry->file_name_nbytes,
576                                        &dentry->extraction_name,
577                                        &dentry->extraction_name_nchars);
578 #endif
579         } else {
580                 if (args->extract_flags & WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES)
581                 {
582                         WARNING("\"%"TS"\" has an invalid filename "
583                                 "that is not supported on this platform; "
584                                 "extracting dummy name instead",
585                                 dentry_full_path(dentry));
586                         goto out_replace;
587                 } else {
588                         WARNING("Not extracting \"%"TS"\": has an invalid filename "
589                                 "that is not supported on this platform",
590                                 dentry_full_path(dentry));
591                         goto skip_dentry;
592                 }
593         }
594
595 out_replace:
596         {
597                 utf16lechar utf16_name_copy[dentry->file_name_nbytes / 2];
598
599                 memcpy(utf16_name_copy, dentry->file_name, dentry->file_name_nbytes);
600                 file_name_valid(utf16_name_copy, dentry->file_name_nbytes / 2, true);
601
602                 tchar *tchar_name;
603                 size_t tchar_nchars;
604         #ifdef __WIN32__
605                 tchar_name = utf16_name_copy;
606                 tchar_nchars = dentry->file_name_nbytes / 2;
607         #else
608                 ret = utf16le_to_tstr(utf16_name_copy,
609                                       dentry->file_name_nbytes,
610                                       &tchar_name, &tchar_nchars);
611                 if (ret)
612                         return ret;
613         #endif
614                 size_t fixed_name_num_chars = tchar_nchars;
615                 tchar fixed_name[tchar_nchars + 50];
616
617                 tmemcpy(fixed_name, tchar_name, tchar_nchars);
618                 fixed_name_num_chars += tsprintf(fixed_name + tchar_nchars,
619                                                  T(" (invalid filename #%lu)"),
620                                                  ++args->invalid_sequence);
621         #ifndef __WIN32__
622                 FREE(tchar_name);
623         #endif
624                 dentry->extraction_name = memdup(fixed_name, 2 * fixed_name_num_chars + 2);
625                 if (!dentry->extraction_name)
626                         return WIMLIB_ERR_NOMEM;
627                 dentry->extraction_name_nchars = fixed_name_num_chars;
628         }
629         return 0;
630 skip_dentry:
631         dentry->needs_extraction = 0;
632         dentry->not_extracted = 1;
633         return 0;
634 }
635
636 static int
637 dentry_reset_needs_extraction(struct wim_dentry *dentry, void *_ignore)
638 {
639         struct wim_inode *inode = dentry->d_inode;
640
641         dentry->needs_extraction = 0;
642         dentry->not_extracted = 0;
643         inode->i_visited = 0;
644         inode->i_dos_name_extracted = 0;
645         FREE(inode->i_extracted_file);
646         inode->i_extracted_file = NULL;
647         if ((void*)dentry->extraction_name != (void*)dentry->file_name)
648                 FREE(dentry->extraction_name);
649         dentry->extraction_name = NULL;
650         return 0;
651 }
652
653 #define WINDOWS_NT_MAX_PATH 32768
654
655 /*
656  * extract_tree - Extract a file or directory tree from the currently selected
657  *                WIM image.
658  *
659  * @wim:        WIMStruct for the WIM file, with the desired image selected
660  *              (as wim->current_image).
661  * @wim_source_path:
662  *              "Canonical" (i.e. no leading or trailing slashes, path
663  *              separators forwald slashes) path inside the WIM image to
664  *              extract.  An empty string means the full image.
665  * @target:
666  *              Filesystem path to extract the file or directory tree to.
667  *
668  * @extract_flags:
669  *              WIMLIB_EXTRACT_FLAG_*.  Also, the private flag
670  *              WIMLIB_EXTRACT_FLAG_MULTI_IMAGE will be set if this is being
671  *              called through wimlib_extract_image() with WIMLIB_ALL_IMAGES as
672  *              the image.
673  *
674  * @progress_func:
675  *              If non-NULL, progress function for the extraction.  The messages
676  *              we may in this function are:
677  *
678  *              WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN or
679  *                      WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN;
680  *              WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN;
681  *              WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END;
682  *              WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY;
683  *              WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS;
684  *              WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS;
685  *              WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END or
686  *                      WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END.
687  *
688  * Returns 0 on success; nonzero on failure.
689  */
690 static int
691 extract_tree(WIMStruct *wim, const tchar *wim_source_path, const tchar *target,
692              int extract_flags, wimlib_progress_func_t progress_func)
693 {
694         int ret;
695         struct list_head stream_list;
696         struct apply_args args;
697         const struct apply_operations *ops;
698         struct wim_dentry *root;
699
700         memset(&args, 0, sizeof(args));
701
702
703         args.w                      = wim;
704         args.target                 = target;
705         args.target_nchars          = tstrlen(target);
706         args.extract_flags          = extract_flags;
707         args.progress_func          = progress_func;
708
709 #ifdef __WIN32__
710         /* Work around defective behavior in Windows where paths longer than 260
711          * characters are not supported by default; instead they need to be
712          * turned into absolute paths and prefixed with "\\?\".  */
713         args.target_lowlevel_path = MALLOC(WINDOWS_NT_MAX_PATH * sizeof(wchar_t));
714         if (!args.target_lowlevel_path)
715         {
716                 ret = WIMLIB_ERR_NOMEM;
717                 goto out;
718         }
719         args.target_lowlevel_path_nchars =
720                 GetFullPathName(args.target, WINDOWS_NT_MAX_PATH - 4,
721                                 &args.target_lowlevel_path[4], NULL);
722
723         if (args.target_lowlevel_path_nchars == 0 ||
724             args.target_lowlevel_path_nchars >= WINDOWS_NT_MAX_PATH - 4)
725         {
726                 WARNING("Can't get full path name for \"%ls\"", args.target);
727                 FREE(args.target_lowlevel_path);
728                 args.target_lowlevel_path = NULL;
729         } else {
730                 wmemcpy(args.target_lowlevel_path, L"\\\\?\\", 4);
731                 args.target_lowlevel_path_nchars += 4;
732         }
733 #endif
734
735         if (progress_func) {
736                 args.progress.extract.wimfile_name = wim->filename;
737                 args.progress.extract.image = wim->current_image;
738                 args.progress.extract.extract_flags = (extract_flags &
739                                                        WIMLIB_EXTRACT_MASK_PUBLIC);
740                 args.progress.extract.image_name = wimlib_get_image_name(wim,
741                                                                          wim->current_image);
742                 args.progress.extract.extract_root_wim_source_path = wim_source_path;
743                 args.progress.extract.target = target;
744         }
745
746 #ifdef WITH_NTFS_3G
747         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
748                 args.vol = ntfs_mount(target, 0);
749                 if (!args.vol) {
750                         ERROR_WITH_ERRNO("Failed to mount NTFS volume `%"TS"'",
751                                          target);
752                         ret = WIMLIB_ERR_NTFS_3G;
753                         goto out_free_target_lowlevel_path;
754                 }
755                 ops = &ntfs_apply_operations;
756         } else
757 #endif
758                 ops = &normal_apply_operations;
759
760         root = get_dentry(wim, wim_source_path);
761         if (!root) {
762                 ERROR("Path \"%"TS"\" does not exist in WIM image %d",
763                       wim_source_path, wim->current_image);
764                 ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
765                 goto out_ntfs_umount;
766         }
767         args.extract_root = root;
768
769         /* Calculate the actual filename component of each extracted dentry, and
770          * in the process set the dentry->needs_extraction flag on dentries that
771          * will be extracted. */
772         ret = for_dentry_in_tree(root, dentry_calculate_extraction_path, &args);
773         if (ret)
774                 goto out_dentry_reset_needs_extraction;
775
776         /* Build a list of the streams that need to be extracted */
777         ret = find_streams_for_extraction(root,
778                                           &stream_list,
779                                           wim->lookup_table, extract_flags);
780         if (ret)
781                 goto out_dentry_reset_needs_extraction;
782
783         /* Calculate the number of bytes of data that will be extracted */
784         calculate_bytes_to_extract(&stream_list, extract_flags,
785                                    &args.progress);
786
787         if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) {
788                 ret = extract_dentry_to_stdout(root);
789                 goto out_dentry_reset_needs_extraction;
790         }
791
792         if (progress_func) {
793                 progress_func(*wim_source_path ? WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN :
794                               WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN,
795                               &args.progress);
796         }
797
798         /* If a sequential extraction was specified, sort the streams to be
799          * extracted by their position in the WIM file, so that the WIM file can
800          * be read sequentially. */
801         if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) {
802                 ret = sort_stream_list_by_wim_position(&stream_list);
803                 if (ret != 0) {
804                         WARNING("Falling back to non-sequential extraction");
805                         extract_flags &= ~WIMLIB_EXTRACT_FLAG_SEQUENTIAL;
806                 }
807         }
808
809         if (progress_func) {
810                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN,
811                               &args.progress);
812         }
813
814         /* Make the directory structure and extract empty files */
815         args.extract_flags |= WIMLIB_EXTRACT_FLAG_NO_STREAMS;
816         args.apply_dentry = ops->apply_dentry;
817         ret = for_dentry_in_tree(root, maybe_apply_dentry, &args);
818         args.extract_flags &= ~WIMLIB_EXTRACT_FLAG_NO_STREAMS;
819         if (ret)
820                 goto out_dentry_reset_needs_extraction;
821
822         if (progress_func) {
823                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END,
824                               &args.progress);
825         }
826
827         if (extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX) {
828                 args.target_realpath = realpath(target, NULL);
829                 if (!args.target_realpath) {
830                         ret = WIMLIB_ERR_NOMEM;
831                         goto out_dentry_reset_needs_extraction;
832                 }
833                 args.target_realpath_len = tstrlen(args.target_realpath);
834         }
835
836         /* Extract non-empty files */
837         ret = apply_stream_list(&stream_list, &args, ops, progress_func);
838         if (ret)
839                 goto out_free_target_realpath;
840
841         if (progress_func) {
842                 progress_func(WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS,
843                               &args.progress);
844         }
845
846         /* Apply timestamps */
847         ret = for_dentry_in_tree_depth(root,
848                                        ops->apply_dentry_timestamps, &args);
849         if (ret)
850                 goto out_free_target_realpath;
851
852         if (progress_func) {
853                 progress_func(*wim_source_path ? WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END :
854                               WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END,
855                               &args.progress);
856         }
857 out_free_target_realpath:
858         FREE(args.target_realpath);
859 out_dentry_reset_needs_extraction:
860         for_dentry_in_tree(root, dentry_reset_needs_extraction, NULL);
861 out_ntfs_umount:
862 #ifdef WITH_NTFS_3G
863         /* Unmount the NTFS volume */
864         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
865                 if (ntfs_umount(args.vol, FALSE) != 0) {
866                         ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%"TS"'",
867                                          args.target);
868                         if (ret == 0)
869                                 ret = WIMLIB_ERR_NTFS_3G;
870                 }
871         }
872 #endif
873 out_free_target_lowlevel_path:
874 #ifdef __WIN32__
875         FREE(args.target_lowlevel_path);
876 #endif
877 out:
878         return ret;
879 }
880
881 /* Validates a single wimlib_extract_command, mostly checking to make sure the
882  * extract flags make sense. */
883 static int
884 check_extract_command(struct wimlib_extract_command *cmd, int wim_header_flags)
885 {
886         int extract_flags;
887         bool is_entire_image = (cmd->wim_source_path[0] == T('\0'));
888
889         /* Empty destination path? */
890         if (cmd->fs_dest_path[0] == T('\0'))
891                 return WIMLIB_ERR_INVALID_PARAM;
892
893         extract_flags = cmd->extract_flags;
894
895         /* Specified both symlink and hardlink modes? */
896         if ((extract_flags &
897              (WIMLIB_EXTRACT_FLAG_SYMLINK |
898               WIMLIB_EXTRACT_FLAG_HARDLINK)) == (WIMLIB_EXTRACT_FLAG_SYMLINK |
899                                                  WIMLIB_EXTRACT_FLAG_HARDLINK))
900                 return WIMLIB_ERR_INVALID_PARAM;
901
902 #ifdef __WIN32__
903         /* Wanted UNIX data on Windows? */
904         if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
905                 ERROR("Extracting UNIX data is not supported on Windows");
906                 return WIMLIB_ERR_INVALID_PARAM;
907         }
908         /* Wanted linked extraction on Windows?  (XXX This is possible, just not
909          * implemented yet.) */
910         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
911                              WIMLIB_EXTRACT_FLAG_HARDLINK))
912         {
913                 ERROR("Linked extraction modes are not supported on Windows");
914                 return WIMLIB_ERR_INVALID_PARAM;
915         }
916 #endif
917
918         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
919                 /* NTFS-3g extraction mode requested */
920 #ifdef WITH_NTFS_3G
921                 if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
922                                       WIMLIB_EXTRACT_FLAG_HARDLINK))) {
923                         ERROR("Cannot specify symlink or hardlink flags when applying\n"
924                               "        directly to a NTFS volume");
925                         return WIMLIB_ERR_INVALID_PARAM;
926                 }
927                 if (!is_entire_image &&
928                     (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS))
929                 {
930                         ERROR("When applying directly to a NTFS volume you can "
931                               "only extract a full image, not part of one");
932                         return WIMLIB_ERR_INVALID_PARAM;
933                 }
934                 if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
935                         ERROR("Cannot restore UNIX-specific data in "
936                               "the NTFS extraction mode");
937                         return WIMLIB_ERR_INVALID_PARAM;
938                 }
939 #else
940                 ERROR("wimlib was compiled without support for NTFS-3g, so");
941                 ERROR("we cannot apply a WIM image directly to a NTFS volume");
942                 return WIMLIB_ERR_UNSUPPORTED;
943 #endif
944         }
945
946         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
947                               WIMLIB_EXTRACT_FLAG_NORPFIX)) ==
948                 (WIMLIB_EXTRACT_FLAG_RPFIX | WIMLIB_EXTRACT_FLAG_NORPFIX))
949         {
950                 ERROR("Cannot specify RPFIX and NORPFIX flags at the same time!");
951                 return WIMLIB_ERR_INVALID_PARAM;
952         }
953
954         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
955                               WIMLIB_EXTRACT_FLAG_NORPFIX)) == 0)
956         {
957                 /* Do reparse point fixups by default if the WIM header says
958                  * they are enabled and we are extracting a full image. */
959                 if ((wim_header_flags & WIM_HDR_FLAG_RP_FIX) && is_entire_image)
960                         extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX;
961         }
962
963         if (!is_entire_image && (extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX)) {
964                 ERROR("Cannot specify --rpfix when not extracting entire image");
965                 return WIMLIB_ERR_INVALID_PARAM;
966         }
967
968         cmd->extract_flags = extract_flags;
969         return 0;
970 }
971
972
973 /* Internal function to execute extraction commands for a WIM image. */
974 static int
975 do_wimlib_extract_files(WIMStruct *wim,
976                         int image,
977                         struct wimlib_extract_command *cmds,
978                         size_t num_cmds,
979                         wimlib_progress_func_t progress_func)
980 {
981         int ret;
982         bool found_link_cmd = false;
983         bool found_nolink_cmd = false;
984
985         /* Select the image from which we are extracting files */
986         ret = select_wim_image(wim, image);
987         if (ret)
988                 return ret;
989
990         /* Make sure there are no streams in the WIM that have not been
991          * checksummed yet. */
992         ret = wim_checksum_unhashed_streams(wim);
993         if (ret)
994                 return ret;
995
996         /* Check for problems with the extraction commands */
997         for (size_t i = 0; i < num_cmds; i++) {
998                 ret = check_extract_command(&cmds[i], wim->hdr.flags);
999                 if (ret)
1000                         return ret;
1001                 if (cmds[i].extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
1002                                              WIMLIB_EXTRACT_FLAG_HARDLINK)) {
1003                         found_link_cmd = true;
1004                 } else {
1005                         found_nolink_cmd = true;
1006                 }
1007                 if (found_link_cmd && found_nolink_cmd) {
1008                         ERROR("Symlink or hardlink extraction mode must "
1009                               "be set on all extraction commands");
1010                         return WIMLIB_ERR_INVALID_PARAM;
1011                 }
1012         }
1013
1014         /* Execute the extraction commands */
1015         for (size_t i = 0; i < num_cmds; i++) {
1016                 ret = extract_tree(wim,
1017                                    cmds[i].wim_source_path,
1018                                    cmds[i].fs_dest_path,
1019                                    cmds[i].extract_flags,
1020                                    progress_func);
1021                 if (ret)
1022                         return ret;
1023         }
1024         return 0;
1025 }
1026
1027 /* Extract files or directories from a WIM image. */
1028 WIMLIBAPI int
1029 wimlib_extract_files(WIMStruct *wim,
1030                      int image,
1031                      const struct wimlib_extract_command *cmds,
1032                      size_t num_cmds,
1033                      int default_extract_flags,
1034                      WIMStruct **additional_swms,
1035                      unsigned num_additional_swms,
1036                      wimlib_progress_func_t progress_func)
1037 {
1038         int ret;
1039         struct wimlib_extract_command *cmds_copy;
1040         int all_flags = 0;
1041
1042         default_extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
1043
1044         ret = verify_swm_set(wim, additional_swms, num_additional_swms);
1045         if (ret)
1046                 goto out;
1047
1048         if (num_cmds == 0)
1049                 goto out;
1050
1051         if (num_additional_swms)
1052                 merge_lookup_tables(wim, additional_swms, num_additional_swms);
1053
1054         cmds_copy = CALLOC(num_cmds, sizeof(cmds[0]));
1055         if (!cmds_copy) {
1056                 ret = WIMLIB_ERR_NOMEM;
1057                 goto out_restore_lookup_table;
1058         }
1059
1060         for (size_t i = 0; i < num_cmds; i++) {
1061                 cmds_copy[i].extract_flags = (default_extract_flags |
1062                                                  cmds[i].extract_flags)
1063                                                 & WIMLIB_EXTRACT_MASK_PUBLIC;
1064                 all_flags |= cmds_copy[i].extract_flags;
1065
1066                 cmds_copy[i].wim_source_path = canonicalize_wim_path(cmds[i].wim_source_path);
1067                 if (!cmds_copy[i].wim_source_path) {
1068                         ret = WIMLIB_ERR_NOMEM;
1069                         goto out_free_cmds_copy;
1070                 }
1071
1072                 cmds_copy[i].fs_dest_path = canonicalize_fs_path(cmds[i].fs_dest_path);
1073                 if (!cmds_copy[i].fs_dest_path) {
1074                         ret = WIMLIB_ERR_NOMEM;
1075                         goto out_free_cmds_copy;
1076                 }
1077
1078         }
1079         ret = do_wimlib_extract_files(wim, image,
1080                                       cmds_copy, num_cmds,
1081                                       progress_func);
1082
1083         if (all_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
1084                          WIMLIB_EXTRACT_FLAG_HARDLINK))
1085         {
1086                 for_lookup_table_entry(wim->lookup_table,
1087                                        lte_free_extracted_file, NULL);
1088         }
1089 out_free_cmds_copy:
1090         for (size_t i = 0; i < num_cmds; i++) {
1091                 FREE(cmds_copy[i].wim_source_path);
1092                 FREE(cmds_copy[i].fs_dest_path);
1093         }
1094         FREE(cmds_copy);
1095 out_restore_lookup_table:
1096         if (num_additional_swms)
1097                 unmerge_lookup_table(wim);
1098 out:
1099         return ret;
1100 }
1101
1102 /*
1103  * Extracts an image from a WIM file.
1104  *
1105  * @wim:                WIMStruct for the WIM file.
1106  *
1107  * @image:              Number of the single image to extract.
1108  *
1109  * @target:             Directory or NTFS volume to extract the image to.
1110  *
1111  * @extract_flags:      Bitwise or of WIMLIB_EXTRACT_FLAG_*.
1112  *
1113  * @progress_func:      If non-NULL, a progress function to be called
1114  *                      periodically.
1115  *
1116  * Returns 0 on success; nonzero on failure.
1117  */
1118 static int
1119 extract_single_image(WIMStruct *wim, int image,
1120                      const tchar *target, int extract_flags,
1121                      wimlib_progress_func_t progress_func)
1122 {
1123         int ret;
1124         tchar *target_copy = canonicalize_fs_path(target);
1125         if (!target_copy)
1126                 return WIMLIB_ERR_NOMEM;
1127         struct wimlib_extract_command cmd = {
1128                 .wim_source_path = T(""),
1129                 .fs_dest_path = target_copy,
1130                 .extract_flags = extract_flags,
1131         };
1132         ret = do_wimlib_extract_files(wim, image, &cmd, 1, progress_func);
1133         FREE(target_copy);
1134         return ret;
1135 }
1136
1137 static const tchar * const filename_forbidden_chars =
1138 T(
1139 #ifdef __WIN32__
1140 "<>:\"/\\|?*"
1141 #else
1142 "/"
1143 #endif
1144 );
1145
1146 /* This function checks if it is okay to use a WIM image's name as a directory
1147  * name.  */
1148 static bool
1149 image_name_ok_as_dir(const tchar *image_name)
1150 {
1151         return image_name && *image_name &&
1152                 !tstrpbrk(image_name, filename_forbidden_chars) &&
1153                 tstrcmp(image_name, T(".")) &&
1154                 tstrcmp(image_name, T(".."));
1155 }
1156
1157 /* Extracts all images from the WIM to the directory @target, with the images
1158  * placed in subdirectories named by their image names. */
1159 static int
1160 extract_all_images(WIMStruct *wim,
1161                    const tchar *target,
1162                    int extract_flags,
1163                    wimlib_progress_func_t progress_func)
1164 {
1165         size_t image_name_max_len = max(xml_get_max_image_name_len(wim), 20);
1166         size_t output_path_len = tstrlen(target);
1167         tchar buf[output_path_len + 1 + image_name_max_len + 1];
1168         int ret;
1169         int image;
1170         const tchar *image_name;
1171         struct stat stbuf;
1172
1173         if (tstat(target, &stbuf)) {
1174                 if (errno == ENOENT)
1175                 {
1176                         if (tmkdir(target, S_IRWXU | S_IRGRP | S_IXGRP |
1177                                            S_IROTH | S_IXOTH))
1178                         {
1179                                 ERROR_WITH_ERRNO("Failed to create directory \"%"TS"\"", target);
1180                                 return WIMLIB_ERR_MKDIR;
1181                         }
1182                 } else {
1183                         ERROR_WITH_ERRNO("Failed to stat \"%"TS"\"", target);
1184                         return WIMLIB_ERR_STAT;
1185                 }
1186         } else if (!S_ISDIR(stbuf.st_mode)) {
1187                 ERROR("\"%"TS"\" is not a directory", target);
1188                 return WIMLIB_ERR_NOTDIR;
1189         }
1190
1191         tmemcpy(buf, target, output_path_len);
1192         buf[output_path_len] = OS_PREFERRED_PATH_SEPARATOR;
1193         for (image = 1; image <= wim->hdr.image_count; image++) {
1194                 image_name = wimlib_get_image_name(wim, image);
1195                 if (image_name_ok_as_dir(image_name)) {
1196                         tstrcpy(buf + output_path_len + 1, image_name);
1197                 } else {
1198                         /* Image name is empty or contains forbidden characters.
1199                          * Use image number instead. */
1200                         tsprintf(buf + output_path_len + 1, T("%d"), image);
1201                 }
1202                 ret = extract_single_image(wim, image, buf, extract_flags,
1203                                            progress_func);
1204                 if (ret)
1205                         return ret;
1206         }
1207         return 0;
1208 }
1209
1210 /* Extracts a single image or all images from a WIM file to a directory or NTFS
1211  * volume. */
1212 WIMLIBAPI int
1213 wimlib_extract_image(WIMStruct *wim,
1214                      int image,
1215                      const tchar *target,
1216                      int extract_flags,
1217                      WIMStruct **additional_swms,
1218                      unsigned num_additional_swms,
1219                      wimlib_progress_func_t progress_func)
1220 {
1221         int ret;
1222
1223         extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
1224
1225         ret = verify_swm_set(wim, additional_swms, num_additional_swms);
1226         if (ret)
1227                 return ret;
1228
1229         if (num_additional_swms)
1230                 merge_lookup_tables(wim, additional_swms, num_additional_swms);
1231
1232         if (image == WIMLIB_ALL_IMAGES) {
1233                 ret = extract_all_images(wim, target,
1234                                          extract_flags | WIMLIB_EXTRACT_FLAG_MULTI_IMAGE,
1235                                          progress_func);
1236         } else {
1237                 ret = extract_single_image(wim, image, target, extract_flags,
1238                                            progress_func);
1239         }
1240
1241         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
1242                              WIMLIB_EXTRACT_FLAG_HARDLINK))
1243         {
1244                 for_lookup_table_entry(wim->lookup_table,
1245                                        lte_free_extracted_file,
1246                                        NULL);
1247         }
1248         if (num_additional_swms)
1249                 unmerge_lookup_table(wim);
1250         return ret;
1251 }