Cleanup and update NEWS
[wimlib] / src / extract.c
1 /*
2  * extract.c
3  *
4  * Support for extracting WIM images, or files or directories contained in a WIM
5  * image.
6  */
7
8 /*
9  * Copyright (C) 2012, 2013 Eric Biggers
10  *
11  * This file is part of wimlib, a library for working with WIM files.
12  *
13  * wimlib is free software; you can redistribute it and/or modify it under the
14  * terms of the GNU General Public License as published by the Free
15  * Software Foundation; either version 3 of the License, or (at your option)
16  * any later version.
17  *
18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
21  * details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
25  */
26
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30
31 #include "wimlib/apply.h"
32 #include "wimlib/dentry.h"
33 #include "wimlib/encoding.h"
34 #include "wimlib/endianness.h"
35 #include "wimlib/error.h"
36 #include "wimlib/lookup_table.h"
37 #include "wimlib/paths.h"
38 #include "wimlib/resource.h"
39 #include "wimlib/swm.h"
40 #ifdef __WIN32__
41 #  include "wimlib/win32.h" /* for realpath() equivalent */
42 #endif
43 #include "wimlib/xml.h"
44
45 #include <errno.h>
46 #include <limits.h>
47 #ifdef WITH_NTFS_3G
48 #  include <ntfs-3g/volume.h> /* for ntfs_mount(), ntfs_umount() */
49 #endif
50 #include <stdlib.h>
51 #include <sys/stat.h>
52 #include <unistd.h>
53
54 static int
55 do_apply_op(struct wim_dentry *dentry, struct apply_args *args,
56             int (*apply_dentry_func)(const tchar *, size_t,
57                                      struct wim_dentry *, struct apply_args *))
58 {
59         tchar *p;
60         size_t extraction_path_nchars;
61         struct wim_dentry *d;
62         LIST_HEAD(ancestor_list);
63
64         extraction_path_nchars = args->target_nchars;
65
66         for (d = dentry; d != args->extract_root; d = d->parent) {
67                 if (d->not_extracted)
68                         return 0;
69                 extraction_path_nchars += d->extraction_name_nchars + 1;
70                 list_add(&d->tmp_list, &ancestor_list);
71         }
72
73         tchar extraction_path[extraction_path_nchars + 1];
74         p = tmempcpy(extraction_path, args->target, args->target_nchars);
75
76         list_for_each_entry(d, &ancestor_list, tmp_list) {
77                 *p++ = OS_PREFERRED_PATH_SEPARATOR;
78                 p = tmempcpy(p, d->extraction_name, d->extraction_name_nchars);
79         }
80         *p = T('\0');
81         return (*apply_dentry_func)(extraction_path, extraction_path_nchars,
82                                     dentry, args);
83 }
84
85
86 /* Extracts a file, directory, or symbolic link from the WIM archive. */
87 static int
88 apply_dentry_normal(struct wim_dentry *dentry, void *arg)
89 {
90 #ifdef __WIN32__
91         return do_apply_op(dentry, arg, win32_do_apply_dentry);
92 #else
93         return do_apply_op(dentry, arg, unix_do_apply_dentry);
94 #endif
95 }
96
97
98 /* Apply timestamps to an extracted file or directory */
99 static int
100 apply_dentry_timestamps_normal(struct wim_dentry *dentry, void *arg)
101 {
102 #ifdef __WIN32__
103         return do_apply_op(dentry, arg, win32_do_apply_dentry_timestamps);
104 #else
105         return do_apply_op(dentry, arg, unix_do_apply_dentry_timestamps);
106 #endif
107 }
108
109 static bool
110 dentry_is_dot_or_dotdot(const struct wim_dentry *dentry)
111 {
112         const utf16lechar *file_name = dentry->file_name;
113         return file_name != NULL &&
114                 file_name[0] == cpu_to_le16('.') &&
115                 (file_name[1] == cpu_to_le16('\0') ||
116                  (file_name[1] == cpu_to_le16('.') &&
117                   file_name[2] == cpu_to_le16('\0')));
118 }
119
120 /* Extract a dentry if it hasn't already been extracted and either
121  * WIMLIB_EXTRACT_FLAG_NO_STREAMS is not specified, or the dentry is a directory
122  * and/or has no unnamed stream. */
123 static int
124 maybe_apply_dentry(struct wim_dentry *dentry, void *arg)
125 {
126         struct apply_args *args = arg;
127         int ret;
128
129         if (!dentry->needs_extraction)
130                 return 0;
131
132         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS &&
133             !dentry_is_directory(dentry) &&
134             inode_unnamed_lte_resolved(dentry->d_inode) != NULL)
135                 return 0;
136
137         if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) &&
138              args->progress_func) {
139                 ret = calculate_dentry_full_path(dentry);
140                 if (ret)
141                         return ret;
142                 args->progress.extract.cur_path = dentry->_full_path;
143                 args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY,
144                                     &args->progress);
145         }
146         ret = args->apply_dentry(dentry, args);
147         if (ret == 0)
148                 dentry->needs_extraction = 0;
149         return ret;
150 }
151
152 static void
153 calculate_bytes_to_extract(struct list_head *stream_list,
154                            int extract_flags,
155                            union wimlib_progress_info *progress)
156 {
157         struct wim_lookup_table_entry *lte;
158         u64 total_bytes = 0;
159         u64 num_streams = 0;
160
161         /* For each stream to be extracted... */
162         list_for_each_entry(lte, stream_list, extraction_list) {
163                 if (extract_flags &
164                     (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
165                 {
166                         /* In the symlink or hard link extraction mode, each
167                          * stream will be extracted one time regardless of how
168                          * many dentries share the stream. */
169                         wimlib_assert(!(extract_flags & WIMLIB_EXTRACT_FLAG_NTFS));
170                         if (!lte->extracted_file) {
171                                 num_streams++;
172                                 total_bytes += wim_resource_size(lte);
173                         }
174                 } else {
175                         num_streams += lte->out_refcnt;
176                         total_bytes += lte->out_refcnt * wim_resource_size(lte);
177                 }
178         }
179         progress->extract.num_streams = num_streams;
180         progress->extract.total_bytes = total_bytes;
181         progress->extract.completed_bytes = 0;
182 }
183
184 static void
185 maybe_add_stream_for_extraction(struct wim_lookup_table_entry *lte,
186                                 struct list_head *stream_list)
187 {
188         if (++lte->out_refcnt == 1) {
189                 INIT_LIST_HEAD(&lte->lte_dentry_list);
190                 list_add_tail(&lte->extraction_list, stream_list);
191         }
192 }
193
194 struct find_streams_ctx {
195         struct list_head stream_list;
196         int extract_flags;
197 };
198
199 static int
200 dentry_find_streams_to_extract(struct wim_dentry *dentry, void *_ctx)
201 {
202         struct find_streams_ctx *ctx = _ctx;
203         struct wim_inode *inode = dentry->d_inode;
204         struct wim_lookup_table_entry *lte;
205         bool dentry_added = false;
206         struct list_head *stream_list = &ctx->stream_list;
207         int extract_flags = ctx->extract_flags;
208
209         if (!dentry->needs_extraction)
210                 return 0;
211
212         lte = inode_unnamed_lte_resolved(inode);
213         if (lte) {
214                 if (!inode->i_visited)
215                         maybe_add_stream_for_extraction(lte, stream_list);
216                 list_add_tail(&dentry->extraction_stream_list, &lte->lte_dentry_list);
217                 dentry_added = true;
218         }
219
220         /* Determine whether to include alternate data stream entries or not.
221          *
222          * UNIX:  Include them if extracting using NTFS-3g.
223          *
224          * Windows: Include them undconditionally, although if the filesystem is
225          * not NTFS we won't actually be able to extract them. */
226 #if defined(WITH_NTFS_3G)
227         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS)
228 #elif defined(__WIN32__)
229         if (1)
230 #else
231         if (0)
232 #endif
233         {
234                 for (unsigned i = 0; i < inode->i_num_ads; i++) {
235                         if (inode->i_ads_entries[i].stream_name_nbytes != 0) {
236                                 lte = inode->i_ads_entries[i].lte;
237                                 if (lte) {
238                                         if (!inode->i_visited) {
239                                                 maybe_add_stream_for_extraction(lte,
240                                                                                 stream_list);
241                                         }
242                                         if (!dentry_added) {
243                                                 list_add_tail(&dentry->extraction_stream_list,
244                                                               &lte->lte_dentry_list);
245                                                 dentry_added = true;
246                                         }
247                                 }
248                         }
249                 }
250         }
251         inode->i_visited = 1;
252         return 0;
253 }
254
255 static int
256 dentry_resolve_and_zero_lte_refcnt(struct wim_dentry *dentry, void *_lookup_table)
257 {
258         struct wim_inode *inode = dentry->d_inode;
259         struct wim_lookup_table *lookup_table = _lookup_table;
260         struct wim_lookup_table_entry *lte;
261
262         inode_resolve_ltes(inode, lookup_table);
263         for (unsigned i = 0; i <= inode->i_num_ads; i++) {
264                 lte = inode_stream_lte_resolved(inode, i);
265                 if (lte)
266                         lte->out_refcnt = 0;
267         }
268         return 0;
269 }
270
271 static void
272 find_streams_for_extraction(struct wim_dentry *root,
273                             struct list_head *stream_list,
274                             struct wim_lookup_table *lookup_table,
275                             int extract_flags)
276 {
277         struct find_streams_ctx ctx;
278
279         INIT_LIST_HEAD(&ctx.stream_list);
280         ctx.extract_flags = extract_flags;
281         for_dentry_in_tree(root, dentry_resolve_and_zero_lte_refcnt, lookup_table);
282         for_dentry_in_tree(root, dentry_find_streams_to_extract, &ctx);
283         list_transfer(&ctx.stream_list, stream_list);
284 }
285
286 struct apply_operations {
287         int (*apply_dentry)(struct wim_dentry *dentry, void *arg);
288         int (*apply_dentry_timestamps)(struct wim_dentry *dentry, void *arg);
289 };
290
291 static const struct apply_operations normal_apply_operations = {
292         .apply_dentry = apply_dentry_normal,
293         .apply_dentry_timestamps = apply_dentry_timestamps_normal,
294 };
295
296 #ifdef WITH_NTFS_3G
297 static const struct apply_operations ntfs_apply_operations = {
298         .apply_dentry = apply_dentry_ntfs,
299         .apply_dentry_timestamps = apply_dentry_timestamps_ntfs,
300 };
301 #endif
302
303 static int
304 apply_stream_list(struct list_head *stream_list,
305                   struct apply_args *args,
306                   const struct apply_operations *ops,
307                   wimlib_progress_func_t progress_func)
308 {
309         uint64_t bytes_per_progress = args->progress.extract.total_bytes / 100;
310         uint64_t next_progress = bytes_per_progress;
311         struct wim_lookup_table_entry *lte;
312         struct wim_dentry *dentry;
313         int ret;
314
315         /* This complicated loop is essentially looping through the dentries,
316          * although dentries may be visited more than once (if a dentry contains
317          * two different nonempty streams) or not at all (if a dentry contains
318          * no non-empty streams).
319          *
320          * The outer loop is over the distinct streams to be extracted so that
321          * sequential reading of the WIM can be implemented. */
322
323         /* For each distinct stream to be extracted */
324         list_for_each_entry(lte, stream_list, extraction_list) {
325                 /* For each dentry to be extracted that is a name for an inode
326                  * containing the stream */
327                 list_for_each_entry(dentry, &lte->lte_dentry_list, extraction_stream_list) {
328                         /* Extract the dentry if it was not already
329                          * extracted */
330                         ret = maybe_apply_dentry(dentry, args);
331                         if (ret)
332                                 return ret;
333                         if (progress_func &&
334                             args->progress.extract.completed_bytes >= next_progress)
335                         {
336                                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS,
337                                               &args->progress);
338                                 if (args->progress.extract.completed_bytes >=
339                                     args->progress.extract.total_bytes)
340                                 {
341                                         next_progress = ~0ULL;
342                                 } else {
343                                         next_progress =
344                                                 min (args->progress.extract.completed_bytes +
345                                                      bytes_per_progress,
346                                                      args->progress.extract.total_bytes);
347                                 }
348                         }
349                 }
350         }
351         return 0;
352 }
353
354 static int
355 sort_stream_list_by_wim_position(struct list_head *stream_list)
356 {
357         struct list_head *cur;
358         size_t num_streams;
359         struct wim_lookup_table_entry **array;
360         size_t i;
361         size_t array_size;
362
363         num_streams = 0;
364         list_for_each(cur, stream_list)
365                 num_streams++;
366         array_size = num_streams * sizeof(array[0]);
367         array = MALLOC(array_size);
368         if (!array) {
369                 ERROR("Failed to allocate %zu bytes to sort stream entries",
370                       array_size);
371                 return WIMLIB_ERR_NOMEM;
372         }
373         cur = stream_list->next;
374         for (i = 0; i < num_streams; i++) {
375                 array[i] = container_of(cur, struct wim_lookup_table_entry, extraction_list);
376                 cur = cur->next;
377         }
378
379         qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position);
380
381         INIT_LIST_HEAD(stream_list);
382         for (i = 0; i < num_streams; i++)
383                 list_add_tail(&array[i]->extraction_list, stream_list);
384         FREE(array);
385         return 0;
386 }
387
388 /*
389  * Extract a dentry to standard output.
390  *
391  * This obviously doesn't make sense in all cases.  We return an error if the
392  * dentry does not correspond to a regular file.  Otherwise we extract the
393  * unnamed data stream only.
394  */
395 static int
396 extract_dentry_to_stdout(struct wim_dentry *dentry)
397 {
398         int ret = 0;
399         if (!dentry_is_regular_file(dentry)) {
400                 ERROR("\"%"TS"\" is not a regular file and therefore cannot be "
401                       "extracted to standard output", dentry->_full_path);
402                 ret = WIMLIB_ERR_NOT_A_REGULAR_FILE;
403         } else {
404                 struct wim_lookup_table_entry *lte;
405
406                 lte = inode_unnamed_lte_resolved(dentry->d_inode);
407                 if (lte) {
408                         ret = extract_wim_resource_to_fd(lte, STDOUT_FILENO,
409                                                          wim_resource_size(lte));
410                 }
411         }
412         return ret;
413 }
414
415 #ifdef __WIN32__
416 static const utf16lechar replacement_char = cpu_to_le16(0xfffd);
417 #else
418 static const utf16lechar replacement_char = cpu_to_le16('?');
419 #endif
420
421 static bool
422 file_name_valid(utf16lechar *name, size_t num_chars, bool fix)
423 {
424         size_t i;
425
426         if (num_chars == 0)
427                 return true;
428         for (i = 0; i < num_chars; i++) {
429                 switch (name[i]) {
430         #ifdef __WIN32__
431                 case cpu_to_le16('\\'):
432                 case cpu_to_le16(':'):
433                 case cpu_to_le16('*'):
434                 case cpu_to_le16('?'):
435                 case cpu_to_le16('"'):
436                 case cpu_to_le16('<'):
437                 case cpu_to_le16('>'):
438                 case cpu_to_le16('|'):
439         #endif
440                 case cpu_to_le16('/'):
441                 case cpu_to_le16('\0'):
442                         if (fix)
443                                 name[i] = replacement_char;
444                         else
445                                 return false;
446                 }
447         }
448
449 #ifdef __WIN32__
450         if (name[num_chars - 1] == cpu_to_le16(' ') ||
451             name[num_chars - 1] == cpu_to_le16('.'))
452         {
453                 if (fix)
454                         name[num_chars - 1] = replacement_char;
455                 else
456                         return false;
457         }
458 #endif
459         return true;
460 }
461
462 /*
463  * dentry_calculate_extraction_path-
464  *
465  * Calculate the actual filename component at which a WIM dentry will be
466  * extracted, handling invalid filenames "properly".
467  *
468  * dentry->extraction_name usually will be set the same as dentry->file_name (on
469  * UNIX, converted into the platform's multibyte encoding).  However, if the
470  * file name contains characters that are not valid on the current platform or
471  * has some other format that is not valid, leave dentry->extraction_name as
472  * NULL and clear dentry->needs_extraction to indicate that this dentry should
473  * not be extracted, unless the appropriate flag
474  * WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES is set in the extract flags, in
475  * which case a substitute filename will be created and set instead.
476  *
477  * Conflicts with case-insensitive names on Windows are handled similarly; see
478  * below.
479  */
480 static int
481 dentry_calculate_extraction_path(struct wim_dentry *dentry, void *_args)
482 {
483         struct apply_args *args = _args;
484         int ret;
485
486         dentry->needs_extraction = 1;
487
488         if (dentry == args->extract_root)
489                 return 0;
490
491         if (dentry_is_dot_or_dotdot(dentry)) {
492                 /* WIM files shouldn't contain . or .. entries.  But if they are
493                  * there, don't attempt to extract them. */
494                 WARNING("Skipping extraction of unexpected . or .. file \"%"TS"\"",
495                         dentry_full_path(dentry));
496                 goto skip_dentry;
497         }
498
499 #ifdef __WIN32__
500         struct wim_dentry *other;
501         list_for_each_entry(other, &dentry->case_insensitive_conflict_list,
502                             case_insensitive_conflict_list)
503         {
504                 if (other->needs_extraction) {
505                         if (args->extract_flags & WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS)
506                         {
507                                 WARNING("\"%"TS"\" has the same case-insensitive "
508                                         "name as \"%"TS"\"; extracting dummy name instead",
509                                         dentry_full_path(dentry),
510                                         dentry_full_path(other));
511                                 goto out_replace;
512                         } else {
513                                 WARNING("Not extracting \"%"TS"\": has same case-insensitive "
514                                         "name as \"%"TS"\"",
515                                         dentry_full_path(dentry),
516                                         dentry_full_path(other));
517                                 goto skip_dentry;
518                         }
519                 }
520         }
521 #endif
522
523         if (file_name_valid(dentry->file_name, dentry->file_name_nbytes / 2, false)) {
524 #ifdef __WIN32__
525                 dentry->extraction_name = dentry->file_name;
526                 dentry->extraction_name_nchars = dentry->file_name_nbytes / 2;
527                 return 0;
528 #else
529                 return utf16le_to_tstr(dentry->file_name,
530                                        dentry->file_name_nbytes,
531                                        &dentry->extraction_name,
532                                        &dentry->extraction_name_nchars);
533 #endif
534         } else {
535                 if (args->extract_flags & WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES)
536                 {
537                         WARNING("\"%"TS"\" has an invalid filename "
538                                 "that is not supported on this platform; "
539                                 "extracting dummy name instead",
540                                 dentry_full_path(dentry));
541                         goto out_replace;
542                 } else {
543                         WARNING("Not extracting \"%"TS"\": has an invalid filename "
544                                 "that is not supported on this platform",
545                                 dentry_full_path(dentry));
546                         goto skip_dentry;
547                 }
548         }
549
550 out_replace:
551         {
552                 utf16lechar utf16_name_copy[dentry->file_name_nbytes / 2];
553
554                 memcpy(utf16_name_copy, dentry->file_name, dentry->file_name_nbytes);
555                 file_name_valid(utf16_name_copy, dentry->file_name_nbytes / 2, true);
556
557                 tchar *tchar_name;
558                 size_t tchar_nchars;
559         #ifdef __WIN32__
560                 tchar_name = utf16_name_copy;
561                 tchar_nchars = dentry->file_name_nbytes / 2;
562         #else
563                 ret = utf16le_to_tstr(utf16_name_copy,
564                                       dentry->file_name_nbytes,
565                                       &tchar_name, &tchar_nchars);
566                 if (ret)
567                         return ret;
568         #endif
569                 size_t fixed_name_num_chars = tchar_nchars;
570                 tchar fixed_name[tchar_nchars + 50];
571
572                 tmemcpy(fixed_name, tchar_name, tchar_nchars);
573                 fixed_name_num_chars += tsprintf(fixed_name + tchar_nchars,
574                                                  T(" (invalid filename #%lu)"),
575                                                  ++args->invalid_sequence);
576         #ifndef __WIN32__
577                 FREE(tchar_name);
578         #endif
579                 dentry->extraction_name = memdup(fixed_name, 2 * fixed_name_num_chars + 2);
580                 if (!dentry->extraction_name)
581                         return WIMLIB_ERR_NOMEM;
582                 dentry->extraction_name_nchars = fixed_name_num_chars;
583         }
584         return 0;
585 skip_dentry:
586         dentry->needs_extraction = 0;
587         dentry->not_extracted = 1;
588         return 0;
589 }
590
591 static int
592 dentry_reset_needs_extraction(struct wim_dentry *dentry, void *_ignore)
593 {
594         dentry->needs_extraction = 0;
595         dentry->not_extracted = 0;
596         dentry->is_win32_name = 0;
597         dentry->d_inode->i_visited = 0;
598         dentry->d_inode->i_dos_name_extracted = 0;
599         FREE(dentry->d_inode->i_extracted_file);
600         dentry->d_inode->i_extracted_file = NULL;
601         if ((void*)dentry->extraction_name != (void*)dentry->file_name)
602                 FREE(dentry->extraction_name);
603         dentry->extraction_name = NULL;
604         return 0;
605 }
606
607 /*
608  * extract_tree - Extract a file or directory tree from the currently selected
609  *                WIM image.
610  *
611  * @wim:        WIMStruct for the WIM file, with the desired image selected
612  *              (as wim->current_image).
613  * @wim_source_path:
614  *              "Canonical" (i.e. no leading or trailing slashes, path
615  *              separators forwald slashes) path inside the WIM image to
616  *              extract.  An empty string means the full image.
617  * @target:
618  *              Filesystem path to extract the file or directory tree to.
619  *
620  * @extract_flags:
621  *              WIMLIB_EXTRACT_FLAG_*.  Also, the private flag
622  *              WIMLIB_EXTRACT_FLAG_MULTI_IMAGE will be set if this is being
623  *              called through wimlib_extract_image() with WIMLIB_ALL_IMAGES as
624  *              the image.
625  *
626  * @progress_func:
627  *              If non-NULL, progress function for the extraction.  The messages
628  *              we may in this function are:
629  *
630  *              WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN or
631  *                      WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN;
632  *              WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN;
633  *              WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END;
634  *              WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY;
635  *              WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS;
636  *              WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS;
637  *              WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END or
638  *                      WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END.
639  *
640  * Returns 0 on success; nonzero on failure.
641  */
642 static int
643 extract_tree(WIMStruct *wim, const tchar *wim_source_path, const tchar *target,
644              int extract_flags, wimlib_progress_func_t progress_func)
645 {
646         int ret;
647         struct list_head stream_list;
648         struct apply_args args;
649         const struct apply_operations *ops;
650         struct wim_dentry *root;
651
652         memset(&args, 0, sizeof(args));
653
654         args.w                      = wim;
655         args.target                 = target;
656         args.extract_flags          = extract_flags;
657         args.progress_func          = progress_func;
658         args.target_nchars          = tstrlen(target);
659
660         if (progress_func) {
661                 args.progress.extract.wimfile_name = wim->filename;
662                 args.progress.extract.image = wim->current_image;
663                 args.progress.extract.extract_flags = (extract_flags &
664                                                        WIMLIB_EXTRACT_MASK_PUBLIC);
665                 args.progress.extract.image_name = wimlib_get_image_name(wim,
666                                                                          wim->current_image);
667                 args.progress.extract.extract_root_wim_source_path = wim_source_path;
668                 args.progress.extract.target = target;
669         }
670
671 #ifdef WITH_NTFS_3G
672         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
673                 args.vol = ntfs_mount(target, 0);
674                 if (!args.vol) {
675                         ERROR_WITH_ERRNO("Failed to mount NTFS volume `%"TS"'",
676                                          target);
677                         ret = WIMLIB_ERR_NTFS_3G;
678                         goto out;
679                 }
680                 ops = &ntfs_apply_operations;
681         } else
682 #endif
683                 ops = &normal_apply_operations;
684
685         root = get_dentry(wim, wim_source_path);
686         if (!root) {
687                 ERROR("Path \"%"TS"\" does not exist in WIM image %d",
688                       wim_source_path, wim->current_image);
689                 ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
690                 goto out_ntfs_umount;
691         }
692         args.extract_root = root;
693
694         /* Calculate the actual filename component of each extracted dentry, and
695          * in the process set the dentry->needs_extraction flag on dentries that
696          * will be extracted. */
697         ret = for_dentry_in_tree(root, dentry_calculate_extraction_path, &args);
698         if (ret)
699                 goto out_dentry_reset_needs_extraction;
700
701         /* Build a list of the streams that need to be extracted */
702         find_streams_for_extraction(root,
703                                     &stream_list,
704                                     wim->lookup_table, extract_flags);
705
706         /* Calculate the number of bytes of data that will be extracted */
707         calculate_bytes_to_extract(&stream_list, extract_flags,
708                                    &args.progress);
709
710         if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) {
711                 ret = extract_dentry_to_stdout(root);
712                 goto out_dentry_reset_needs_extraction;
713         }
714
715         if (progress_func) {
716                 progress_func(*wim_source_path ? WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN :
717                               WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN,
718                               &args.progress);
719         }
720
721         /* If a sequential extraction was specified, sort the streams to be
722          * extracted by their position in the WIM file, so that the WIM file can
723          * be read sequentially. */
724         if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) {
725                 ret = sort_stream_list_by_wim_position(&stream_list);
726                 if (ret != 0) {
727                         WARNING("Falling back to non-sequential extraction");
728                         extract_flags &= ~WIMLIB_EXTRACT_FLAG_SEQUENTIAL;
729                 }
730         }
731
732         if (progress_func) {
733                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN,
734                               &args.progress);
735         }
736
737         /* Make the directory structure and extract empty files */
738         args.extract_flags |= WIMLIB_EXTRACT_FLAG_NO_STREAMS;
739         args.apply_dentry = ops->apply_dentry;
740         ret = for_dentry_in_tree(root, maybe_apply_dentry, &args);
741         args.extract_flags &= ~WIMLIB_EXTRACT_FLAG_NO_STREAMS;
742         if (ret)
743                 goto out_dentry_reset_needs_extraction;
744
745         if (progress_func) {
746                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END,
747                               &args.progress);
748         }
749
750         if (extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX) {
751                 args.target_realpath = realpath(target, NULL);
752                 if (!args.target_realpath) {
753                         ret = WIMLIB_ERR_NOMEM;
754                         goto out_dentry_reset_needs_extraction;
755                 }
756                 args.target_realpath_len = tstrlen(args.target_realpath);
757         }
758
759         /* Extract non-empty files */
760         ret = apply_stream_list(&stream_list, &args, ops, progress_func);
761         if (ret)
762                 goto out_free_target_realpath;
763
764         if (progress_func) {
765                 progress_func(WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS,
766                               &args.progress);
767         }
768
769         /* Apply timestamps */
770         ret = for_dentry_in_tree_depth(root,
771                                        ops->apply_dentry_timestamps, &args);
772         if (ret)
773                 goto out_free_target_realpath;
774
775         if (progress_func) {
776                 progress_func(*wim_source_path ? WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END :
777                               WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END,
778                               &args.progress);
779         }
780 out_free_target_realpath:
781         FREE(args.target_realpath);
782 out_dentry_reset_needs_extraction:
783         for_dentry_in_tree(root, dentry_reset_needs_extraction, NULL);
784 out_ntfs_umount:
785 #ifdef WITH_NTFS_3G
786         /* Unmount the NTFS volume */
787         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
788                 if (ntfs_umount(args.vol, FALSE) != 0) {
789                         ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%"TS"'",
790                                          args.target);
791                         if (ret == 0)
792                                 ret = WIMLIB_ERR_NTFS_3G;
793                 }
794         }
795 #endif
796 out:
797         return ret;
798 }
799
800 /* Validates a single wimlib_extract_command, mostly checking to make sure the
801  * extract flags make sense. */
802 static int
803 check_extract_command(struct wimlib_extract_command *cmd, int wim_header_flags)
804 {
805         int extract_flags;
806         bool is_entire_image = (cmd->wim_source_path[0] == T('\0'));
807
808         /* Empty destination path? */
809         if (cmd->fs_dest_path[0] == T('\0'))
810                 return WIMLIB_ERR_INVALID_PARAM;
811
812         extract_flags = cmd->extract_flags;
813
814         /* Specified both symlink and hardlink modes? */
815         if ((extract_flags &
816              (WIMLIB_EXTRACT_FLAG_SYMLINK |
817               WIMLIB_EXTRACT_FLAG_HARDLINK)) == (WIMLIB_EXTRACT_FLAG_SYMLINK |
818                                                  WIMLIB_EXTRACT_FLAG_HARDLINK))
819                 return WIMLIB_ERR_INVALID_PARAM;
820
821 #ifdef __WIN32__
822         /* Wanted UNIX data on Windows? */
823         if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
824                 ERROR("Extracting UNIX data is not supported on Windows");
825                 return WIMLIB_ERR_INVALID_PARAM;
826         }
827         /* Wanted linked extraction on Windows?  (XXX This is possible, just not
828          * implemented yet.) */
829         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
830                              WIMLIB_EXTRACT_FLAG_HARDLINK))
831         {
832                 ERROR("Linked extraction modes are not supported on Windows");
833                 return WIMLIB_ERR_INVALID_PARAM;
834         }
835 #endif
836
837         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
838                 /* NTFS-3g extraction mode requested */
839 #ifdef WITH_NTFS_3G
840                 if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
841                                       WIMLIB_EXTRACT_FLAG_HARDLINK))) {
842                         ERROR("Cannot specify symlink or hardlink flags when applying\n"
843                               "        directly to a NTFS volume");
844                         return WIMLIB_ERR_INVALID_PARAM;
845                 }
846                 if (!is_entire_image &&
847                     (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS))
848                 {
849                         ERROR("When applying directly to a NTFS volume you can "
850                               "only extract a full image, not part of one");
851                         return WIMLIB_ERR_INVALID_PARAM;
852                 }
853                 if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
854                         ERROR("Cannot restore UNIX-specific data in "
855                               "the NTFS extraction mode");
856                         return WIMLIB_ERR_INVALID_PARAM;
857                 }
858 #else
859                 ERROR("wimlib was compiled without support for NTFS-3g, so");
860                 ERROR("we cannot apply a WIM image directly to a NTFS volume");
861                 return WIMLIB_ERR_UNSUPPORTED;
862 #endif
863         }
864
865         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
866                               WIMLIB_EXTRACT_FLAG_NORPFIX)) ==
867                 (WIMLIB_EXTRACT_FLAG_RPFIX | WIMLIB_EXTRACT_FLAG_NORPFIX))
868         {
869                 ERROR("Cannot specify RPFIX and NORPFIX flags at the same time!");
870                 return WIMLIB_ERR_INVALID_PARAM;
871         }
872
873         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
874                               WIMLIB_EXTRACT_FLAG_NORPFIX)) == 0)
875         {
876                 /* Do reparse point fixups by default if the WIM header says
877                  * they are enabled and we are extracting a full image. */
878                 if ((wim_header_flags & WIM_HDR_FLAG_RP_FIX) && is_entire_image)
879                         extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX;
880         }
881
882         if (!is_entire_image && (extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX)) {
883                 ERROR("Cannot specify --rpfix when not extracting entire image");
884                 return WIMLIB_ERR_INVALID_PARAM;
885         }
886
887         cmd->extract_flags = extract_flags;
888         return 0;
889 }
890
891
892 /* Internal function to execute extraction commands for a WIM image. */
893 static int
894 do_wimlib_extract_files(WIMStruct *wim,
895                         int image,
896                         struct wimlib_extract_command *cmds,
897                         size_t num_cmds,
898                         wimlib_progress_func_t progress_func)
899 {
900         int ret;
901         bool found_link_cmd = false;
902         bool found_nolink_cmd = false;
903
904         /* Select the image from which we are extracting files */
905         ret = select_wim_image(wim, image);
906         if (ret)
907                 return ret;
908
909         /* Make sure there are no streams in the WIM that have not been
910          * checksummed yet. */
911         ret = wim_checksum_unhashed_streams(wim);
912         if (ret)
913                 return ret;
914
915         /* Check for problems with the extraction commands */
916         for (size_t i = 0; i < num_cmds; i++) {
917                 ret = check_extract_command(&cmds[i], wim->hdr.flags);
918                 if (ret)
919                         return ret;
920                 if (cmds[i].extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
921                                              WIMLIB_EXTRACT_FLAG_HARDLINK)) {
922                         found_link_cmd = true;
923                 } else {
924                         found_nolink_cmd = true;
925                 }
926                 if (found_link_cmd && found_nolink_cmd) {
927                         ERROR("Symlink or hardlink extraction mode must "
928                               "be set on all extraction commands");
929                         return WIMLIB_ERR_INVALID_PARAM;
930                 }
931         }
932
933         /* Execute the extraction commands */
934         for (size_t i = 0; i < num_cmds; i++) {
935                 ret = extract_tree(wim,
936                                    cmds[i].wim_source_path,
937                                    cmds[i].fs_dest_path,
938                                    cmds[i].extract_flags,
939                                    progress_func);
940                 if (ret)
941                         return ret;
942         }
943         return 0;
944 }
945
946 /* Extract files or directories from a WIM image. */
947 WIMLIBAPI int
948 wimlib_extract_files(WIMStruct *wim,
949                      int image,
950                      const struct wimlib_extract_command *cmds,
951                      size_t num_cmds,
952                      int default_extract_flags,
953                      WIMStruct **additional_swms,
954                      unsigned num_additional_swms,
955                      wimlib_progress_func_t progress_func)
956 {
957         int ret;
958         struct wimlib_extract_command *cmds_copy;
959         int all_flags = 0;
960
961         default_extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
962
963         ret = verify_swm_set(wim, additional_swms, num_additional_swms);
964         if (ret)
965                 goto out;
966
967         if (num_cmds == 0)
968                 goto out;
969
970         if (num_additional_swms)
971                 merge_lookup_tables(wim, additional_swms, num_additional_swms);
972
973         cmds_copy = CALLOC(num_cmds, sizeof(cmds[0]));
974         if (!cmds_copy) {
975                 ret = WIMLIB_ERR_NOMEM;
976                 goto out_restore_lookup_table;
977         }
978
979         for (size_t i = 0; i < num_cmds; i++) {
980                 cmds_copy[i].extract_flags = (default_extract_flags |
981                                                  cmds[i].extract_flags)
982                                                 & WIMLIB_EXTRACT_MASK_PUBLIC;
983                 all_flags |= cmds_copy[i].extract_flags;
984
985                 cmds_copy[i].wim_source_path = canonicalize_wim_path(cmds[i].wim_source_path);
986                 if (!cmds_copy[i].wim_source_path) {
987                         ret = WIMLIB_ERR_NOMEM;
988                         goto out_free_cmds_copy;
989                 }
990
991                 cmds_copy[i].fs_dest_path = canonicalize_fs_path(cmds[i].fs_dest_path);
992                 if (!cmds_copy[i].fs_dest_path) {
993                         ret = WIMLIB_ERR_NOMEM;
994                         goto out_free_cmds_copy;
995                 }
996
997         }
998         ret = do_wimlib_extract_files(wim, image,
999                                       cmds_copy, num_cmds,
1000                                       progress_func);
1001
1002         if (all_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
1003                          WIMLIB_EXTRACT_FLAG_HARDLINK))
1004         {
1005                 for_lookup_table_entry(wim->lookup_table,
1006                                        lte_free_extracted_file, NULL);
1007         }
1008 out_free_cmds_copy:
1009         for (size_t i = 0; i < num_cmds; i++) {
1010                 FREE(cmds_copy[i].wim_source_path);
1011                 FREE(cmds_copy[i].fs_dest_path);
1012         }
1013         FREE(cmds_copy);
1014 out_restore_lookup_table:
1015         if (num_additional_swms)
1016                 unmerge_lookup_table(wim);
1017 out:
1018         return ret;
1019 }
1020
1021 /*
1022  * Extracts an image from a WIM file.
1023  *
1024  * @wim:                WIMStruct for the WIM file.
1025  *
1026  * @image:              Number of the single image to extract.
1027  *
1028  * @target:             Directory or NTFS volume to extract the image to.
1029  *
1030  * @extract_flags:      Bitwise or of WIMLIB_EXTRACT_FLAG_*.
1031  *
1032  * @progress_func:      If non-NULL, a progress function to be called
1033  *                      periodically.
1034  *
1035  * Returns 0 on success; nonzero on failure.
1036  */
1037 static int
1038 extract_single_image(WIMStruct *wim, int image,
1039                      const tchar *target, int extract_flags,
1040                      wimlib_progress_func_t progress_func)
1041 {
1042         int ret;
1043         tchar *target_copy = canonicalize_fs_path(target);
1044         if (!target_copy)
1045                 return WIMLIB_ERR_NOMEM;
1046         struct wimlib_extract_command cmd = {
1047                 .wim_source_path = T(""),
1048                 .fs_dest_path = target_copy,
1049                 .extract_flags = extract_flags,
1050         };
1051         ret = do_wimlib_extract_files(wim, image, &cmd, 1, progress_func);
1052         FREE(target_copy);
1053         return ret;
1054 }
1055
1056 static const tchar * const filename_forbidden_chars =
1057 T(
1058 #ifdef __WIN32__
1059 "<>:\"/\\|?*"
1060 #else
1061 "/"
1062 #endif
1063 );
1064
1065 /* This function checks if it is okay to use a WIM image's name as a directory
1066  * name.  */
1067 static bool
1068 image_name_ok_as_dir(const tchar *image_name)
1069 {
1070         return image_name && *image_name &&
1071                 !tstrpbrk(image_name, filename_forbidden_chars) &&
1072                 tstrcmp(image_name, T(".")) &&
1073                 tstrcmp(image_name, T(".."));
1074 }
1075
1076 /* Extracts all images from the WIM to the directory @target, with the images
1077  * placed in subdirectories named by their image names. */
1078 static int
1079 extract_all_images(WIMStruct *wim,
1080                    const tchar *target,
1081                    int extract_flags,
1082                    wimlib_progress_func_t progress_func)
1083 {
1084         size_t image_name_max_len = max(xml_get_max_image_name_len(wim), 20);
1085         size_t output_path_len = tstrlen(target);
1086         tchar buf[output_path_len + 1 + image_name_max_len + 1];
1087         int ret;
1088         int image;
1089         const tchar *image_name;
1090         struct stat stbuf;
1091
1092         if (tstat(target, &stbuf)) {
1093                 if (errno == ENOENT)
1094                 {
1095                         if (tmkdir(target, S_IRWXU | S_IRGRP | S_IXGRP |
1096                                            S_IROTH | S_IXOTH))
1097                         {
1098                                 ERROR_WITH_ERRNO("Failed to create directory \"%"TS"\"", target);
1099                                 return WIMLIB_ERR_MKDIR;
1100                         }
1101                 } else {
1102                         ERROR_WITH_ERRNO("Failed to stat \"%"TS"\"", target);
1103                         return WIMLIB_ERR_STAT;
1104                 }
1105         } else if (!S_ISDIR(stbuf.st_mode)) {
1106                 ERROR("\"%"TS"\" is not a directory", target);
1107                 return WIMLIB_ERR_NOTDIR;
1108         }
1109
1110         tmemcpy(buf, target, output_path_len);
1111         buf[output_path_len] = T('/');
1112         for (image = 1; image <= wim->hdr.image_count; image++) {
1113                 image_name = wimlib_get_image_name(wim, image);
1114                 if (image_name_ok_as_dir(image_name)) {
1115                         tstrcpy(buf + output_path_len + 1, image_name);
1116                 } else {
1117                         /* Image name is empty or contains forbidden characters.
1118                          * Use image number instead. */
1119                         tsprintf(buf + output_path_len + 1, T("%d"), image);
1120                 }
1121                 ret = extract_single_image(wim, image, buf, extract_flags,
1122                                            progress_func);
1123                 if (ret)
1124                         return ret;
1125         }
1126         return 0;
1127 }
1128
1129 /* Extracts a single image or all images from a WIM file to a directory or NTFS
1130  * volume. */
1131 WIMLIBAPI int
1132 wimlib_extract_image(WIMStruct *wim,
1133                      int image,
1134                      const tchar *target,
1135                      int extract_flags,
1136                      WIMStruct **additional_swms,
1137                      unsigned num_additional_swms,
1138                      wimlib_progress_func_t progress_func)
1139 {
1140         int ret;
1141
1142         extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC;
1143
1144         ret = verify_swm_set(wim, additional_swms, num_additional_swms);
1145         if (ret)
1146                 return ret;
1147
1148         if (num_additional_swms)
1149                 merge_lookup_tables(wim, additional_swms, num_additional_swms);
1150
1151         if (image == WIMLIB_ALL_IMAGES) {
1152                 ret = extract_all_images(wim, target,
1153                                          extract_flags | WIMLIB_EXTRACT_FLAG_MULTI_IMAGE,
1154                                          progress_func);
1155         } else {
1156                 ret = extract_single_image(wim, image, target, extract_flags,
1157                                            progress_func);
1158         }
1159
1160         if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK |
1161                              WIMLIB_EXTRACT_FLAG_HARDLINK))
1162         {
1163                 for_lookup_table_entry(wim->lookup_table,
1164                                        lte_free_extracted_file,
1165                                        NULL);
1166         }
1167         if (num_additional_swms)
1168                 unmerge_lookup_table(wim);
1169         return ret;
1170 }