Improve handling of invalid filenames
[wimlib] / src / extract.c
1 /*
2  * extract.c
3  *
4  * Support for extracting WIM images, or files or directories contained in a WIM
5  * image.
6  */
7
8 /*
9  * Copyright (C) 2012, 2013 Eric Biggers
10  *
11  * This file is part of wimlib, a library for working with WIM files.
12  *
13  * wimlib is free software; you can redistribute it and/or modify it under the
14  * terms of the GNU General Public License as published by the Free
15  * Software Foundation; either version 3 of the License, or (at your option)
16  * any later version.
17  *
18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
21  * details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
25  */
26
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30
31 #include "wimlib/apply.h"
32 #include "wimlib/dentry.h"
33 #include "wimlib/encoding.h"
34 #include "wimlib/endianness.h"
35 #include "wimlib/error.h"
36 #include "wimlib/lookup_table.h"
37 #include "wimlib/paths.h"
38 #include "wimlib/resource.h"
39 #include "wimlib/swm.h"
40 #ifdef __WIN32__
41 #  include "wimlib/win32.h" /* for realpath() equivalent */
42 #endif
43 #include "wimlib/xml.h"
44
45 #include <errno.h>
46 #include <limits.h>
47 #ifdef WITH_NTFS_3G
48 #  include <ntfs-3g/volume.h> /* for ntfs_mount(), ntfs_umount() */
49 #endif
50 #include <stdlib.h>
51 #include <sys/stat.h>
52 #include <unistd.h>
53
54 static int
55 do_apply_op(struct wim_dentry *dentry, struct apply_args *args,
56             int (*apply_dentry_func)(const tchar *, size_t,
57                                      struct wim_dentry *, struct apply_args *))
58 {
59         tchar *p;
60         size_t extraction_path_nchars;
61         struct wim_dentry *d;
62         LIST_HEAD(ancestor_list);
63
64         extraction_path_nchars = args->target_nchars;
65
66         for (d = dentry; d != args->extract_root; d = d->parent) {
67                 if (d->not_extracted)
68                         return 0;
69                 extraction_path_nchars += d->extraction_name_nchars + 1;
70                 list_add(&d->tmp_list, &ancestor_list);
71         }
72
73         tchar extraction_path[extraction_path_nchars + 1];
74         p = tmempcpy(extraction_path, args->target, args->target_nchars);
75
76         list_for_each_entry(d, &ancestor_list, tmp_list) {
77                 *p++ = OS_PREFERRED_PATH_SEPARATOR;
78                 p = tmempcpy(p, d->extraction_name, d->extraction_name_nchars);
79         }
80         *p = T('\0');
81         return (*apply_dentry_func)(extraction_path, extraction_path_nchars,
82                                     dentry, args);
83 }
84
85
86 /* Extracts a file, directory, or symbolic link from the WIM archive. */
87 static int
88 apply_dentry_normal(struct wim_dentry *dentry, void *arg)
89 {
90 #ifdef __WIN32__
91         return do_apply_op(dentry, arg, win32_do_apply_dentry);
92 #else
93         return do_apply_op(dentry, arg, unix_do_apply_dentry);
94 #endif
95 }
96
97
98 /* Apply timestamps to an extracted file or directory */
99 static int
100 apply_dentry_timestamps_normal(struct wim_dentry *dentry, void *arg)
101 {
102 #ifdef __WIN32__
103         return do_apply_op(dentry, arg, win32_do_apply_dentry_timestamps);
104 #else
105         return do_apply_op(dentry, arg, unix_do_apply_dentry_timestamps);
106 #endif
107 }
108
109 static bool
110 dentry_is_dot_or_dotdot(const struct wim_dentry *dentry)
111 {
112         const utf16lechar *file_name = dentry->file_name;
113         return file_name != NULL &&
114                 file_name[0] == cpu_to_le16('.') &&
115                 (file_name[1] == cpu_to_le16('\0') ||
116                  (file_name[1] == cpu_to_le16('.') &&
117                   file_name[2] == cpu_to_le16('\0')));
118 }
119
120 /* Extract a dentry if it hasn't already been extracted and either
121  * WIMLIB_EXTRACT_FLAG_NO_STREAMS is not specified, or the dentry is a directory
122  * and/or has no unnamed stream. */
123 static int
124 maybe_apply_dentry(struct wim_dentry *dentry, void *arg)
125 {
126         struct apply_args *args = arg;
127         int ret;
128
129         if (!dentry->needs_extraction)
130                 return 0;
131
132         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS &&
133             !dentry_is_directory(dentry) &&
134             inode_unnamed_lte_resolved(dentry->d_inode) != NULL)
135                 return 0;
136
137         if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) &&
138              args->progress_func) {
139                 ret = calculate_dentry_full_path(dentry);
140                 if (ret)
141                         return ret;
142                 args->progress.extract.cur_path = dentry->_full_path;
143                 args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY,
144                                     &args->progress);
145         }
146         ret = args->apply_dentry(dentry, args);
147         if (ret == 0)
148                 dentry->needs_extraction = 0;
149         return ret;
150 }
151
152 static void
153 calculate_bytes_to_extract(struct list_head *stream_list,
154                            int extract_flags,
155                            union wimlib_progress_info *progress)
156 {
157         struct wim_lookup_table_entry *lte;
158         u64 total_bytes = 0;
159         u64 num_streams = 0;
160
161         /* For each stream to be extracted... */
162         list_for_each_entry(lte, stream_list, extraction_list) {
163                 if (extract_flags &
164                     (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
165                 {
166                         /* In the symlink or hard link extraction mode, each
167                          * stream will be extracted one time regardless of how
168                          * many dentries share the stream. */
169                         wimlib_assert(!(extract_flags & WIMLIB_EXTRACT_FLAG_NTFS));
170                         if (!lte->extracted_file) {
171                                 num_streams++;
172                                 total_bytes += wim_resource_size(lte);
173                         }
174                 } else {
175                         num_streams += lte->out_refcnt;
176                         total_bytes += lte->out_refcnt * wim_resource_size(lte);
177                 }
178         }
179         progress->extract.num_streams = num_streams;
180         progress->extract.total_bytes = total_bytes;
181         progress->extract.completed_bytes = 0;
182 }
183
184 static void
185 maybe_add_stream_for_extraction(struct wim_lookup_table_entry *lte,
186                                 struct list_head *stream_list)
187 {
188         if (++lte->out_refcnt == 1) {
189                 INIT_LIST_HEAD(&lte->lte_dentry_list);
190                 list_add_tail(&lte->extraction_list, stream_list);
191         }
192 }
193
194 struct find_streams_ctx {
195         struct list_head stream_list;
196         int extract_flags;
197 };
198
199 static int
200 dentry_find_streams_to_extract(struct wim_dentry *dentry, void *_ctx)
201 {
202         struct find_streams_ctx *ctx = _ctx;
203         struct wim_inode *inode = dentry->d_inode;
204         struct wim_lookup_table_entry *lte;
205         bool dentry_added = false;
206         struct list_head *stream_list = &ctx->stream_list;
207         int extract_flags = ctx->extract_flags;
208
209         if (!dentry->needs_extraction)
210                 return 0;
211
212         lte = inode_unnamed_lte_resolved(inode);
213         if (lte) {
214                 if (!inode->i_visited)
215                         maybe_add_stream_for_extraction(lte, stream_list);
216                 list_add_tail(&dentry->extraction_stream_list, &lte->lte_dentry_list);
217                 dentry_added = true;
218         }
219
220         /* Determine whether to include alternate data stream entries or not.
221          *
222          * UNIX:  Include them if extracting using NTFS-3g.
223          *
224          * Windows: Include them undconditionally, although if the filesystem is
225          * not NTFS we won't actually be able to extract them. */
226 #if defined(WITH_NTFS_3G)
227         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS)
228 #elif defined(__WIN32__)
229         if (1)
230 #else
231         if (0)
232 #endif
233         {
234                 for (unsigned i = 0; i < inode->i_num_ads; i++) {
235                         if (inode->i_ads_entries[i].stream_name_nbytes != 0) {
236                                 lte = inode->i_ads_entries[i].lte;
237                                 if (lte) {
238                                         if (!inode->i_visited) {
239                                                 maybe_add_stream_for_extraction(lte,
240                                                                                 stream_list);
241                                         }
242                                         if (!dentry_added) {
243                                                 list_add_tail(&dentry->extraction_stream_list,
244                                                               &lte->lte_dentry_list);
245                                                 dentry_added = true;
246                                         }
247                                 }
248                         }
249                 }
250         }
251         inode->i_visited = 1;
252         return 0;
253 }
254
255 static int
256 dentry_resolve_and_zero_lte_refcnt(struct wim_dentry *dentry, void *_lookup_table)
257 {
258         struct wim_inode *inode = dentry->d_inode;
259         struct wim_lookup_table *lookup_table = _lookup_table;
260         struct wim_lookup_table_entry *lte;
261
262         inode_resolve_ltes(inode, lookup_table);
263         for (unsigned i = 0; i <= inode->i_num_ads; i++) {
264                 lte = inode_stream_lte_resolved(inode, i);
265                 if (lte)
266                         lte->out_refcnt = 0;
267         }
268         return 0;
269 }
270
271 static void
272 find_streams_for_extraction(struct wim_dentry *root,
273                             struct list_head *stream_list,
274                             struct wim_lookup_table *lookup_table,
275                             int extract_flags)
276 {
277         struct find_streams_ctx ctx;
278
279         INIT_LIST_HEAD(&ctx.stream_list);
280         ctx.extract_flags = extract_flags;
281         for_dentry_in_tree(root, dentry_resolve_and_zero_lte_refcnt, lookup_table);
282         for_dentry_in_tree(root, dentry_find_streams_to_extract, &ctx);
283         list_transfer(&ctx.stream_list, stream_list);
284 }
285
286 struct apply_operations {
287         int (*apply_dentry)(struct wim_dentry *dentry, void *arg);
288         int (*apply_dentry_timestamps)(struct wim_dentry *dentry, void *arg);
289 };
290
291 static const struct apply_operations normal_apply_operations = {
292         .apply_dentry = apply_dentry_normal,
293         .apply_dentry_timestamps = apply_dentry_timestamps_normal,
294 };
295
296 #ifdef WITH_NTFS_3G
297 static const struct apply_operations ntfs_apply_operations = {
298         .apply_dentry = apply_dentry_ntfs,
299         .apply_dentry_timestamps = apply_dentry_timestamps_ntfs,
300 };
301 #endif
302
303 static int
304 apply_stream_list(struct list_head *stream_list,
305                   struct apply_args *args,
306                   const struct apply_operations *ops,
307                   wimlib_progress_func_t progress_func)
308 {
309         uint64_t bytes_per_progress = args->progress.extract.total_bytes / 100;
310         uint64_t next_progress = bytes_per_progress;
311         struct wim_lookup_table_entry *lte;
312         struct wim_dentry *dentry;
313         int ret;
314
315         /* This complicated loop is essentially looping through the dentries,
316          * although dentries may be visited more than once (if a dentry contains
317          * two different nonempty streams) or not at all (if a dentry contains
318          * no non-empty streams).
319          *
320          * The outer loop is over the distinct streams to be extracted so that
321          * sequential reading of the WIM can be implemented. */
322
323         /* For each distinct stream to be extracted */
324         list_for_each_entry(lte, stream_list, extraction_list) {
325                 /* For each dentry to be extracted that is a name for an inode
326                  * containing the stream */
327                 list_for_each_entry(dentry, &lte->lte_dentry_list, extraction_stream_list) {
328                         /* Extract the dentry if it was not already
329                          * extracted */
330                         ret = maybe_apply_dentry(dentry, args);
331                         if (ret)
332                                 return ret;
333                         if (progress_func &&
334                             args->progress.extract.completed_bytes >= next_progress)
335                         {
336                                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS,
337                                               &args->progress);
338                                 if (args->progress.extract.completed_bytes >=
339                                     args->progress.extract.total_bytes)
340                                 {
341                                         next_progress = ~0ULL;
342                                 } else {
343                                         next_progress =
344                                                 min (args->progress.extract.completed_bytes +
345                                                      bytes_per_progress,
346                                                      args->progress.extract.total_bytes);
347                                 }
348                         }
349                 }
350         }
351         return 0;
352 }
353
354 static int
355 sort_stream_list_by_wim_position(struct list_head *stream_list)
356 {
357         struct list_head *cur;
358         size_t num_streams;
359         struct wim_lookup_table_entry **array;
360         size_t i;
361         size_t array_size;
362
363         num_streams = 0;
364         list_for_each(cur, stream_list)
365                 num_streams++;
366         array_size = num_streams * sizeof(array[0]);
367         array = MALLOC(array_size);
368         if (!array) {
369                 ERROR("Failed to allocate %zu bytes to sort stream entries",
370                       array_size);
371                 return WIMLIB_ERR_NOMEM;
372         }
373         cur = stream_list->next;
374         for (i = 0; i < num_streams; i++) {
375                 array[i] = container_of(cur, struct wim_lookup_table_entry, extraction_list);
376                 cur = cur->next;
377         }
378
379         qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position);
380
381         INIT_LIST_HEAD(stream_list);
382         for (i = 0; i < num_streams; i++)
383                 list_add_tail(&array[i]->extraction_list, stream_list);
384         FREE(array);
385         return 0;
386 }
387
388 /*
389  * Extract a dentry to standard output.
390  *
391  * This obviously doesn't make sense in all cases.  We return an error if the
392  * dentry does not correspond to a regular file.  Otherwise we extract the
393  * unnamed data stream only.
394  */
395 static int
396 extract_dentry_to_stdout(struct wim_dentry *dentry)
397 {
398         int ret = 0;
399         if (!dentry_is_regular_file(dentry)) {
400                 ERROR("\"%"TS"\" is not a regular file and therefore cannot be "
401                       "extracted to standard output", dentry->_full_path);
402                 ret = WIMLIB_ERR_NOT_A_REGULAR_FILE;
403         } else {
404                 struct wim_lookup_table_entry *lte;
405
406                 lte = inode_unnamed_lte_resolved(dentry->d_inode);
407                 if (lte) {
408                         ret = extract_wim_resource_to_fd(lte, STDOUT_FILENO,
409                                                          wim_resource_size(lte));
410                 }
411         }
412         return ret;
413 }
414
415 #ifdef __WIN32__
416 static const utf16lechar replacement_char = cpu_to_le16(0xfffd);
417 #else
418 static const utf16lechar replacement_char = cpu_to_le16('?');
419 #endif
420
421 static bool
422 file_name_valid(utf16lechar *name, size_t num_chars, bool fix)
423 {
424         size_t i;
425
426         if (num_chars == 0)
427                 return true;
428         for (i = 0; i < num_chars; i++) {
429                 switch (name[i]) {
430         #ifdef __WIN32__
431                 case cpu_to_le16('\\'):
432                 case cpu_to_le16(':'):
433                 case cpu_to_le16('*'):
434                 case cpu_to_le16('?'):
435                 case cpu_to_le16('"'):
436                 case cpu_to_le16('<'):
437                 case cpu_to_le16('>'):
438                 case cpu_to_le16('|'):
439         #endif
440                 case cpu_to_le16('/'):
441                 case cpu_to_le16('\0'):
442                         if (fix)
443                                 name[i] = replacement_char;
444                         else
445                                 return false;
446                 }
447         }
448
449         if (name[num_chars - 1] == cpu_to_le16(' ') ||
450             name[num_chars - 1] == cpu_to_le16('.'))
451         {
452                 if (fix)
453                         name[num_chars - 1] = replacement_char;
454                 else
455                         return false;
456         }
457         return true;
458 }
459
460 /*
461  * dentry_calculate_extraction_path-
462  *
463  * Calculate the actual filename component at which a WIM dentry will be
464  * extracted, handling invalid filenames "properly".
465  *
466  * dentry->extraction_name usually will be set the same as dentry->file_name (on
467  * UNIX, converted into the platform's multibyte encoding).  However, if the
468  * file name contains characters that are not valid on the current platform or
469  * has some other format that is not valid, leave dentry->extraction_name as
470  * NULL and clear dentry->needs_extraction to indicate that this dentry should
471  * not be extracted, unless the appropriate flag
472  * WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES is set in the extract flags, in
473  * which case a substitute filename will be created and set instead.
474  *
475  * Conflicts with case-insensitive names on Windows are handled similarly; see
476  * below.
477  */
478 static int
479 dentry_calculate_extraction_path(struct wim_dentry *dentry, void *_args)
480 {
481         struct apply_args *args = _args;
482         int ret;
483
484         dentry->needs_extraction = 1;
485
486         if (dentry == args->extract_root)
487                 return 0;
488
489         if (dentry_is_dot_or_dotdot(dentry)) {
490                 /* WIM files shouldn't contain . or .. entries.  But if they are
491                  * there, don't attempt to extract them. */
492                 WARNING("Skipping extraction of unexpected . or .. file \"%"TS"\"",
493                         dentry_full_path(dentry));
494                 goto skip_dentry;
495         }
496
497 #ifdef __WIN32__
498         struct wim_dentry *other;
499         list_for_each_entry(other, &dentry->case_insensitive_conflict_list,
500                             case_insensitive_conflict_list)
501         {
502                 if (other->needs_extraction) {
503                         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS)
504                         {
505                                 WARNING("\"%"TS"\" has the same case-insensitive "
506                                         "name as \"%"TS"\"; extracting dummy name instead",
507                                         dentry_full_path(dentry),
508                                         dentry_full_path(other));
509                                 goto out_replace;
510                         } else {
511                                 WARNING("Not extracting \"%"TS"\": has same case-insensitive "
512                                         "name as \"%"TS"\"",
513                                         dentry_full_path(dentry),
514                                         dentry_full_path(other));
515                                 goto skip_dentry;
516                         }
517                 }
518         }
519 #endif
520
521         if (file_name_valid(dentry->file_name, dentry->file_name_nbytes / 2, false)) {
522 #ifdef __WIN32__
523                 dentry->extraction_name = dentry->file_name;
524                 dentry->extraction_name_nchars = dentry->file_name_nbytes / 2;
525                 return 0;
526 #else
527                 return utf16le_to_tstr(dentry->file_name,
528                                        dentry->file_name_nbytes,
529                                        &dentry->extraction_name,
530                                        &dentry->extraction_name_nchars);
531 #endif
532         } else {
533                 if (args->extract_flags & WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES)
534                 {
535                         WARNING("\"%"TS"\" has an invalid filename "
536                                 "that is not supported on this platform; "
537                                 "extracting dummy name instead",
538                                 dentry_full_path(dentry));
539                         goto out_replace;
540                 } else {
541                         WARNING("Not extracting \"%"TS"\": has an invalid filename "
542                                 "that is not supported on this platform",
543                                 dentry_full_path(dentry));
544                         goto skip_dentry;
545                 }
546         }
547
548 out_replace:
549         {
550                 utf16lechar utf16_name_copy[dentry->file_name_nbytes / 2];
551
552                 memcpy(utf16_name_copy, dentry->file_name, dentry->file_name_nbytes);
553                 file_name_valid(utf16_name_copy, dentry->file_name_nbytes / 2, true);
554
555                 tchar *tchar_name;
556                 size_t tchar_nchars;
557         #ifdef __WIN32__
558                 tchar_name = utf16_name_copy;
559                 tchar_nchars = dentry->file_name_nbytes / 2;
560         #else
561                 ret = utf16le_to_tstr(utf16_name_copy,
562                                       dentry->file_name_nbytes,
563                                       &tchar_name, &tchar_nchars);
564                 if (ret)
565                         return ret;
566         #endif
567                 size_t fixed_name_num_chars = tchar_nchars;
568                 tchar fixed_name[tchar_nchars + 50];
569                 size_t extraction_name_nbytes;
570
571                 tmemcpy(fixed_name, tchar_name, tchar_nchars);
572                 fixed_name_num_chars += tsprintf(fixed_name + tchar_nchars,
573                                                  T(" (invalid filename #%lu)"),
574                                                  ++args->invalid_sequence);
575                 dentry->extraction_name = memdup(fixed_name, 2 * fixed_name_num_chars + 2);
576                 if (!dentry->extraction_name)
577                         return WIMLIB_ERR_NOMEM;
578                 dentry->extraction_name_nchars = fixed_name_num_chars;
579         }
580         return 0;
581 skip_dentry:
582         dentry->needs_extraction = 0;
583         dentry->not_extracted = 1;
584         return 0;
585 }
586
587 static int
588 dentry_reset_needs_extraction(struct wim_dentry *dentry, void *_ignore)
589 {
590         dentry->needs_extraction = 0;
591         dentry->not_extracted = 0;
592         dentry->is_win32_name = 0;
593         dentry->d_inode->i_visited = 0;
594         dentry->d_inode->i_dos_name_extracted = 0;
595         FREE(dentry->d_inode->i_extracted_file);
596         dentry->d_inode->i_extracted_file = NULL;
597         if ((void*)dentry->extraction_name != (void*)dentry->file_name)
598                 FREE(dentry->extraction_name);
599         dentry->extraction_name = NULL;
600         return 0;
601 }
602
603 /*
604  * extract_tree - Extract a file or directory tree from the currently selected
605  *                WIM image.
606  *
607  * @wim:        WIMStruct for the WIM file, with the desired image selected
608  *              (as wim->current_image).
609  * @wim_source_path:
610  *              "Canonical" (i.e. no leading or trailing slashes, path
611  *              separators forwald slashes) path inside the WIM image to
612  *              extract.  An empty string means the full image.
613  * @target:
614  *              Filesystem path to extract the file or directory tree to.
615  *
616  * @extract_flags:
617  *              WIMLIB_EXTRACT_FLAG_*.  Also, the private flag
618  *              WIMLIB_EXTRACT_FLAG_MULTI_IMAGE will be set if this is being
619  *              called through wimlib_extract_image() with WIMLIB_ALL_IMAGES as
620  *              the image.
621  *
622  * @progress_func:
623  *              If non-NULL, progress function for the extraction.  The messages
624  *              we may in this function are:
625  *
626  *              WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN or
627  *                      WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN;
628  *              WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN;
629  *              WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END;
630  *              WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY;
631  *              WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS;
632  *              WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS;
633  *              WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END or
634  *                      WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END.
635  *
636  * Returns 0 on success; nonzero on failure.
637  */
638 static int
639 extract_tree(WIMStruct *wim, const tchar *wim_source_path, const tchar *target,
640              int extract_flags, wimlib_progress_func_t progress_func)
641 {
642         int ret;
643         struct list_head stream_list;
644         struct apply_args args;
645         const struct apply_operations *ops;
646         struct wim_dentry *root;
647
648         memset(&args, 0, sizeof(args));
649
650         args.w                      = wim;
651         args.target                 = target;
652         args.extract_flags          = extract_flags;
653         args.progress_func          = progress_func;
654         args.target_nchars          = tstrlen(target);
655
656         if (progress_func) {
657                 args.progress.extract.wimfile_name = wim->filename;
658                 args.progress.extract.image = wim->current_image;
659                 args.progress.extract.extract_flags = (extract_flags &
660                                                        WIMLIB_EXTRACT_MASK_PUBLIC);
661                 args.progress.extract.image_name = wimlib_get_image_name(wim,
662                                                                          wim->current_image);
663                 args.progress.extract.extract_root_wim_source_path = wim_source_path;
664                 args.progress.extract.target = target;
665         }
666
667 #ifdef WITH_NTFS_3G
668         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
669                 args.vol = ntfs_mount(target, 0);
670                 if (!args.vol) {
671                         ERROR_WITH_ERRNO("Failed to mount NTFS volume `%"TS"'",
672                                          target);
673                         ret = WIMLIB_ERR_NTFS_3G;
674                         goto out;
675                 }
676                 ops = &ntfs_apply_operations;
677         } else
678 #endif
679                 ops = &normal_apply_operations;
680
681         root = get_dentry(wim, wim_source_path);
682         if (!root) {
683                 ERROR("Path \"%"TS"\" does not exist in WIM image %d",
684                       wim_source_path, wim->current_image);
685                 ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
686                 goto out_ntfs_umount;
687         }
688         args.extract_root = root;
689
690         /* Calculate the actual filename component of each extracted dentry, and
691          * in the process set the dentry->needs_extraction flag on dentries that
692          * will be extracted. */
693         ret = for_dentry_in_tree(root, dentry_calculate_extraction_path, &args);
694         if (ret)
695                 goto out_dentry_reset_needs_extraction;
696
697         /* Build a list of the streams that need to be extracted */
698         find_streams_for_extraction(root,
699                                     &stream_list,
700                                     wim->lookup_table, extract_flags);
701
702         /* Calculate the number of bytes of data that will be extracted */
703         calculate_bytes_to_extract(&stream_list, extract_flags,
704                                    &args.progress);
705
706         if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) {
707                 ret = extract_dentry_to_stdout(root);
708                 goto out_dentry_reset_needs_extraction;
709         }
710
711         if (progress_func) {
712                 progress_func(*wim_source_path ? WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN :
713                               WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN,
714                               &args.progress);
715         }
716
717         /* If a sequential extraction was specified, sort the streams to be
718          * extracted by their position in the WIM file, so that the WIM file can
719          * be read sequentially. */
720         if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) {
721                 ret = sort_stream_list_by_wim_position(&stream_list);
722                 if (ret != 0) {
723                         WARNING("Falling back to non-sequential extraction");
724                         extract_flags &= ~WIMLIB_EXTRACT_FLAG_SEQUENTIAL;
725                 }
726         }
727
728         if (progress_func) {
729                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN,
730                               &args.progress);
731         }
732
733         /* Make the directory structure and extract empty files */
734         args.extract_flags |= WIMLIB_EXTRACT_FLAG_NO_STREAMS;
735         args.apply_dentry = ops->apply_dentry;
736         ret = for_dentry_in_tree(root, maybe_apply_dentry, &args);
737         args.extract_flags &= ~WIMLIB_EXTRACT_FLAG_NO_STREAMS;
738         if (ret)
739                 goto out_dentry_reset_needs_extraction;
740
741         if (progress_func) {
742                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END,
743                               &args.progress);
744         }
745
746         if (extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX) {
747                 args.target_realpath = realpath(target, NULL);
748                 if (!args.target_realpath) {
749                         ret = WIMLIB_ERR_NOMEM;
750                         goto out_dentry_reset_needs_extraction;
751                 }
752                 args.target_realpath_len = tstrlen(args.target_realpath);
753         }
754
755         /* Extract non-empty files */
756         ret = apply_stream_list(&stream_list, &args, ops, progress_func);
757         if (ret)
758                 goto out_free_target_realpath;
759
760         if (progress_func) {
761                 progress_func(WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS,
762                               &args.progress);
763         }
764
765         /* Apply timestamps */
766         ret = for_dentry_in_tree_depth(root,
767                                        ops->apply_dentry_timestamps, &args);
768         if (ret)
769                 goto out_free_target_realpath;
770
771         if (progress_func) {
772                 progress_func(*wim_source_path ? WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END :
773                               WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END,
774                               &args.progress);
775         }
776 out_free_target_realpath:
777         FREE(args.target_realpath);
778 out_dentry_reset_needs_extraction:
779         for_dentry_in_tree(root, dentry_reset_needs_extraction, NULL);
780 out_ntfs_umount:
781 #ifdef WITH_NTFS_3G
782         /* Unmount the NTFS volume */
783         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
784                 if (ntfs_umount(args.vol, FALSE) != 0) {
785                         ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%"TS"'",
786                                          args.target);
787                         if (ret == 0)
788                                 ret = WIMLIB_ERR_NTFS_3G;
789                 }
790         }
791 #endif
792 out:
793         return ret;
794 }
795
796 /* Validates a single wimlib_extract_command, mostly checking to make sure the
797  * extract flags make sense. */
798 static int
799 check_extract_command(struct wimlib_extract_command *cmd, int wim_header_flags)
800 {
801         int extract_flags;
802         bool is_entire_image = (cmd->wim_source_path[0] == T('\0'));
803
804         /* Empty destination path? */
805         if (cmd->fs_dest_path[0] == T('\0'))
806                 return WIMLIB_ERR_INVALID_PARAM;
807
808         extract_flags = cmd->extract_flags;
809
810         /* Specified both symlink and hardlink modes? */
811         if ((extract_flags &
812              (WIMLIB_EXTRACT_FLAG_SYMLINK |
813               WIMLIB_EXTRACT_FLAG_HARDLINK)) == (WIMLIB_EXTRACT_FLAG_SYMLINK |
814                                                  WIMLIB_EXTRACT_FLAG_HARDLINK))
815                 return WIMLIB_ERR_INVALID_PARAM;
816
817 #ifdef __WIN32__
818         /* Wanted UNIX data on Windows? */
819         if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
820                 ERROR("Extracting UNIX data is not supported on Windows");
821                 return WIMLIB_ERR_INVALID_PARAM;
822         }
823         /* Wanted linked extraction on Windows?  (XXX This is possible, just not
824          * implemented yet.) */
825         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
826                              WIMLIB_EXTRACT_FLAG_HARDLINK))
827         {
828                 ERROR("Linked extraction modes are not supported on Windows");
829                 return WIMLIB_ERR_INVALID_PARAM;
830         }
831 #endif
832
833         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
834                 /* NTFS-3g extraction mode requested */
835 #ifdef WITH_NTFS_3G
836                 if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
837                                       WIMLIB_EXTRACT_FLAG_HARDLINK))) {
838                         ERROR("Cannot specify symlink or hardlink flags when applying\n"
839                               "        directly to a NTFS volume");
840                         return WIMLIB_ERR_INVALID_PARAM;
841                 }
842                 if (!is_entire_image &&
843                     (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS))
844                 {
845                         ERROR("When applying directly to a NTFS volume you can "
846                               "only extract a full image, not part of one");
847                         return WIMLIB_ERR_INVALID_PARAM;
848                 }
849                 if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
850                         ERROR("Cannot restore UNIX-specific data in "
851                               "the NTFS extraction mode");
852                         return WIMLIB_ERR_INVALID_PARAM;
853                 }
854 #else
855                 ERROR("wimlib was compiled without support for NTFS-3g, so");
856                 ERROR("we cannot apply a WIM image directly to a NTFS volume");
857                 return WIMLIB_ERR_UNSUPPORTED;
858 #endif
859         }
860
861         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
862                               WIMLIB_EXTRACT_FLAG_NORPFIX)) ==
863                 (WIMLIB_EXTRACT_FLAG_RPFIX | WIMLIB_EXTRACT_FLAG_NORPFIX))
864         {
865                 ERROR("Cannot specify RPFIX and NORPFIX flags at the same time!");
866                 return WIMLIB_ERR_INVALID_PARAM;
867         }
868
869         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
870                               WIMLIB_EXTRACT_FLAG_NORPFIX)) == 0)
871         {
872                 /* Do reparse point fixups by default if the WIM header says
873                  * they are enabled and we are extracting a full image. */
874                 if ((wim_header_flags & WIM_HDR_FLAG_RP_FIX) && is_entire_image)
875                         extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX;
876         }
877
878         if (!is_entire_image && (extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX)) {
879                 ERROR("Cannot specify --rpfix when not extracting entire image");
880                 return WIMLIB_ERR_INVALID_PARAM;
881         }
882
883         cmd->extract_flags = extract_flags;
884         return 0;
885 }
886
887
888 /* Internal function to execute extraction commands for a WIM image. */
889 static int
890 do_wimlib_extract_files(WIMStruct *wim,
891                         int image,
892                         struct wimlib_extract_command *cmds,
893                         size_t num_cmds,
894                         wimlib_progress_func_t progress_func)
895 {
896         int ret;
897         bool found_link_cmd = false;
898         bool found_nolink_cmd = false;
899
900         /* Select the image from which we are extracting files */
901         ret = select_wim_image(wim, image);
902         if (ret)
903                 return ret;
904
905         /* Make sure there are no streams in the WIM that have not been
906          * checksummed yet. */
907         ret = wim_checksum_unhashed_streams(wim);
908         if (ret)
909                 return ret;
910
911         /* Check for problems with the extraction commands */
912         for (size_t i = 0; i < num_cmds; i++) {
913                 ret = check_extract_command(&cmds[i], wim->hdr.flags);
914                 if (ret)
915                         return ret;
916                 if (cmds[i].extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
917                                              WIMLIB_EXTRACT_FLAG_HARDLINK)) {
918                         found_link_cmd = true;
919                 } else {
920                         found_nolink_cmd = true;
921                 }
922                 if (found_link_cmd && found_nolink_cmd) {
923                         ERROR("Symlink or hardlink extraction mode must "
924                               "be set on all extraction commands");
925                         return WIMLIB_ERR_INVALID_PARAM;
926                 }
927         }
928
929         /* Execute the extraction commands */
930         for (size_t i = 0; i < num_cmds; i++) {
931                 ret = extract_tree(wim,
932                                    cmds[i].wim_source_path,
933                                    cmds[i].fs_dest_path,
934                                    cmds[i].extract_flags,
935                                    progress_func);
936                 if (ret)
937                         return ret;
938         }
939         return 0;
940 }
941
942 /* Extract files or directories from a WIM image. */
943 WIMLIBAPI int
944 wimlib_extract_files(WIMStruct *wim,
945                      int image,
946                      const struct wimlib_extract_command *cmds,
947                      size_t num_cmds,
948                      int default_extract_flags,
949                      WIMStruct **additional_swms,
950                      unsigned num_additional_swms,
951                      wimlib_progress_func_t progress_func)
952 {
953         int ret;
954         struct wimlib_extract_command *cmds_copy;
955         int all_flags = 0;
956
957         default_extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
958
959         ret = verify_swm_set(wim, additional_swms, num_additional_swms);
960         if (ret)
961                 goto out;
962
963         if (num_cmds == 0)
964                 goto out;
965
966         if (num_additional_swms)
967                 merge_lookup_tables(wim, additional_swms, num_additional_swms);
968
969         cmds_copy = CALLOC(num_cmds, sizeof(cmds[0]));
970         if (!cmds_copy) {
971                 ret = WIMLIB_ERR_NOMEM;
972                 goto out_restore_lookup_table;
973         }
974
975         for (size_t i = 0; i < num_cmds; i++) {
976                 cmds_copy[i].extract_flags = (default_extract_flags |
977                                                  cmds[i].extract_flags)
978                                                 & WIMLIB_EXTRACT_MASK_PUBLIC;
979                 all_flags |= cmds_copy[i].extract_flags;
980
981                 cmds_copy[i].wim_source_path = canonicalize_wim_path(cmds[i].wim_source_path);
982                 if (!cmds_copy[i].wim_source_path) {
983                         ret = WIMLIB_ERR_NOMEM;
984                         goto out_free_cmds_copy;
985                 }
986
987                 cmds_copy[i].fs_dest_path = canonicalize_fs_path(cmds[i].fs_dest_path);
988                 if (!cmds_copy[i].fs_dest_path) {
989                         ret = WIMLIB_ERR_NOMEM;
990                         goto out_free_cmds_copy;
991                 }
992
993         }
994         ret = do_wimlib_extract_files(wim, image,
995                                       cmds_copy, num_cmds,
996                                       progress_func);
997
998         if (all_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
999                          WIMLIB_EXTRACT_FLAG_HARDLINK))
1000         {
1001                 for_lookup_table_entry(wim->lookup_table,
1002                                        lte_free_extracted_file, NULL);
1003         }
1004 out_free_cmds_copy:
1005         for (size_t i = 0; i < num_cmds; i++) {
1006                 FREE(cmds_copy[i].wim_source_path);
1007                 FREE(cmds_copy[i].fs_dest_path);
1008         }
1009         FREE(cmds_copy);
1010 out_restore_lookup_table:
1011         if (num_additional_swms)
1012                 unmerge_lookup_table(wim);
1013 out:
1014         return ret;
1015 }
1016
1017 /*
1018  * Extracts an image from a WIM file.
1019  *
1020  * @wim:                WIMStruct for the WIM file.
1021  *
1022  * @image:              Number of the single image to extract.
1023  *
1024  * @target:             Directory or NTFS volume to extract the image to.
1025  *
1026  * @extract_flags:      Bitwise or of WIMLIB_EXTRACT_FLAG_*.
1027  *
1028  * @progress_func:      If non-NULL, a progress function to be called
1029  *                      periodically.
1030  *
1031  * Returns 0 on success; nonzero on failure.
1032  */
1033 static int
1034 extract_single_image(WIMStruct *wim, int image,
1035                      const tchar *target, int extract_flags,
1036                      wimlib_progress_func_t progress_func)
1037 {
1038         int ret;
1039         tchar *target_copy = canonicalize_fs_path(target);
1040         if (!target_copy)
1041                 return WIMLIB_ERR_NOMEM;
1042         struct wimlib_extract_command cmd = {
1043                 .wim_source_path = T(""),
1044                 .fs_dest_path = target_copy,
1045                 .extract_flags = extract_flags,
1046         };
1047         ret = do_wimlib_extract_files(wim, image, &cmd, 1, progress_func);
1048         FREE(target_copy);
1049         return ret;
1050 }
1051
1052 static const tchar * const filename_forbidden_chars =
1053 T(
1054 #ifdef __WIN32__
1055 "<>:\"/\\|?*"
1056 #else
1057 "/"
1058 #endif
1059 );
1060
1061 /* This function checks if it is okay to use a WIM image's name as a directory
1062  * name.  */
1063 static bool
1064 image_name_ok_as_dir(const tchar *image_name)
1065 {
1066         return image_name && *image_name &&
1067                 !tstrpbrk(image_name, filename_forbidden_chars) &&
1068                 tstrcmp(image_name, T(".")) &&
1069                 tstrcmp(image_name, T(".."));
1070 }
1071
1072 /* Extracts all images from the WIM to the directory @target, with the images
1073  * placed in subdirectories named by their image names. */
1074 static int
1075 extract_all_images(WIMStruct *wim,
1076                    const tchar *target,
1077                    int extract_flags,
1078                    wimlib_progress_func_t progress_func)
1079 {
1080         size_t image_name_max_len = max(xml_get_max_image_name_len(wim), 20);
1081         size_t output_path_len = tstrlen(target);
1082         tchar buf[output_path_len + 1 + image_name_max_len + 1];
1083         int ret;
1084         int image;
1085         const tchar *image_name;
1086         struct stat stbuf;
1087
1088         if (tstat(target, &stbuf)) {
1089                 if (errno == ENOENT)
1090                 {
1091                         if (tmkdir(target, S_IRWXU | S_IRGRP | S_IXGRP |
1092                                            S_IROTH | S_IXOTH))
1093                         {
1094                                 ERROR_WITH_ERRNO("Failed to create directory \"%"TS"\"", target);
1095                                 return WIMLIB_ERR_MKDIR;
1096                         }
1097                 } else {
1098                         ERROR_WITH_ERRNO("Failed to stat \"%"TS"\"", target);
1099                         return WIMLIB_ERR_STAT;
1100                 }
1101         } else if (!S_ISDIR(stbuf.st_mode)) {
1102                 ERROR("\"%"TS"\" is not a directory", target);
1103                 return WIMLIB_ERR_NOTDIR;
1104         }
1105
1106         tmemcpy(buf, target, output_path_len);
1107         buf[output_path_len] = T('/');
1108         for (image = 1; image <= wim->hdr.image_count; image++) {
1109                 image_name = wimlib_get_image_name(wim, image);
1110                 if (image_name_ok_as_dir(image_name)) {
1111                         tstrcpy(buf + output_path_len + 1, image_name);
1112                 } else {
1113                         /* Image name is empty or contains forbidden characters.
1114                          * Use image number instead. */
1115                         tsprintf(buf + output_path_len + 1, T("%d"), image);
1116                 }
1117                 ret = extract_single_image(wim, image, buf, extract_flags,
1118                                            progress_func);
1119                 if (ret)
1120                         return ret;
1121         }
1122         return 0;
1123 }
1124
1125 /* Extracts a single image or all images from a WIM file to a directory or NTFS
1126  * volume. */
1127 WIMLIBAPI int
1128 wimlib_extract_image(WIMStruct *wim,
1129                      int image,
1130                      const tchar *target,
1131                      int extract_flags,
1132                      WIMStruct **additional_swms,
1133                      unsigned num_additional_swms,
1134                      wimlib_progress_func_t progress_func)
1135 {
1136         int ret;
1137
1138         extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
1139
1140         ret = verify_swm_set(wim, additional_swms, num_additional_swms);
1141         if (ret)
1142                 return ret;
1143
1144         if (num_additional_swms)
1145                 merge_lookup_tables(wim, additional_swms, num_additional_swms);
1146
1147         if (image == WIMLIB_ALL_IMAGES) {
1148                 ret = extract_all_images(wim, target,
1149                                          extract_flags | WIMLIB_EXTRACT_FLAG_MULTI_IMAGE,
1150                                          progress_func);
1151         } else {
1152                 ret = extract_single_image(wim, image, target, extract_flags,
1153                                            progress_func);
1154         }
1155
1156         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
1157                              WIMLIB_EXTRACT_FLAG_HARDLINK))
1158         {
1159                 for_lookup_table_entry(wim->lookup_table,
1160                                        lte_free_extracted_file,
1161                                        NULL);
1162         }
1163         if (num_additional_swms)
1164                 unmerge_lookup_table(wim);
1165         return ret;
1166 }