Merge branch 'new_extract'
[wimlib] / src / extract.c
1 /*
2  * extract.c
3  *
4  * Support for extracting WIM images, or files or directories contained in a WIM
5  * image.
6  */
7
8 /*
9  * Copyright (C) 2012, 2013, 2014 Eric Biggers
10  *
11  * This file is part of wimlib, a library for working with WIM files.
12  *
13  * wimlib is free software; you can redistribute it and/or modify it under the
14  * terms of the GNU General Public License as published by the Free
15  * Software Foundation; either version 3 of the License, or (at your option)
16  * any later version.
17  *
18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
21  * details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
25  */
26
27 /*
28  * This file provides the API functions wimlib_extract_image(),
29  * wimlib_extract_image_from_pipe(), wimlib_extract_paths(), and
30  * wimlib_extract_pathlist().  Internally, all end up calling
31  * do_wimlib_extract_paths() and extract_trees().
32  *
33  * Although wimlib supports multiple extraction modes/backends (NTFS-3g, UNIX,
34  * Win32), this file does not itself have code to extract files or directories
35  * to any specific target; instead, it handles generic functionality and relies
36  * on lower-level callback functions declared in `struct apply_operations' to do
37  * the actual extraction.
38  */
39
40 #ifdef HAVE_CONFIG_H
41 #  include "config.h"
42 #endif
43
44 #include "wimlib/apply.h"
45 #include "wimlib/dentry.h"
46 #include "wimlib/encoding.h"
47 #include "wimlib/endianness.h"
48 #include "wimlib/error.h"
49 #include "wimlib/lookup_table.h"
50 #include "wimlib/metadata.h"
51 #include "wimlib/pathlist.h"
52 #include "wimlib/paths.h"
53 #include "wimlib/reparse.h"
54 #include "wimlib/resource.h"
55 #include "wimlib/security.h"
56 #ifdef __WIN32__
57 #  include "wimlib/win32.h" /* for realpath() equivalent */
58 #endif
59 #include "wimlib/xml.h"
60 #include "wimlib/wildcard.h"
61 #include "wimlib/wim.h"
62
63 #include <errno.h>
64 #include <fcntl.h>
65 #include <stdlib.h>
66 #include <sys/stat.h>
67 #include <unistd.h>
68
69 #define WIMLIB_EXTRACT_FLAG_MULTI_IMAGE 0x80000000
70 #define WIMLIB_EXTRACT_FLAG_FROM_PIPE   0x40000000
71 #define WIMLIB_EXTRACT_FLAG_IMAGEMODE   0x20000000
72
73 /* Keep in sync with wimlib.h  */
74 #define WIMLIB_EXTRACT_MASK_PUBLIC                              \
75         (WIMLIB_EXTRACT_FLAG_NTFS                       |       \
76          WIMLIB_EXTRACT_FLAG_NO_ACLS                    |       \
77          WIMLIB_EXTRACT_FLAG_STRICT_ACLS                |       \
78          WIMLIB_EXTRACT_FLAG_RPFIX                      |       \
79          WIMLIB_EXTRACT_FLAG_NORPFIX                    |       \
80          WIMLIB_EXTRACT_FLAG_TO_STDOUT                  |       \
81          WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES  |       \
82          WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS         |       \
83          WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS          |       \
84          WIMLIB_EXTRACT_FLAG_STRICT_SHORT_NAMES         |       \
85          WIMLIB_EXTRACT_FLAG_STRICT_SYMLINKS            |       \
86          WIMLIB_EXTRACT_FLAG_GLOB_PATHS                 |       \
87          WIMLIB_EXTRACT_FLAG_STRICT_GLOB                |       \
88          WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES              |       \
89          WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE  |       \
90          WIMLIB_EXTRACT_FLAG_WIMBOOT)
91
92 /* Check whether the extraction of a dentry should be skipped completely.  */
93 static bool
94 dentry_is_supported(struct wim_dentry *dentry,
95                     const struct wim_features *supported_features)
96 {
97         struct wim_inode *inode = dentry->d_inode;
98
99         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
100                 return supported_features->reparse_points ||
101                         (inode_is_symlink(inode) &&
102                          supported_features->symlink_reparse_points);
103         }
104         if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
105                 if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)
106                         return supported_features->encrypted_directories != 0;
107                 else
108                         return supported_features->encrypted_files != 0;
109         }
110         return true;
111 }
112
113
114 #define PWM_ALLOW_WIM_HDR 0x00001
115 #define PWM_SILENT_EOF    0x00002
116
117 /* Read the header from a stream in a pipable WIM.  */
118 static int
119 read_pwm_stream_header(WIMStruct *pwm, struct wim_lookup_table_entry *lte,
120                        struct wim_resource_spec *rspec,
121                        int flags, struct wim_header_disk *hdr_ret)
122 {
123         union {
124                 struct pwm_stream_hdr stream_hdr;
125                 struct wim_header_disk pwm_hdr;
126         } buf;
127         struct wim_reshdr reshdr;
128         int ret;
129
130         ret = full_read(&pwm->in_fd, &buf.stream_hdr, sizeof(buf.stream_hdr));
131         if (ret)
132                 goto read_error;
133
134         if ((flags & PWM_ALLOW_WIM_HDR) && buf.stream_hdr.magic == PWM_MAGIC) {
135                 BUILD_BUG_ON(sizeof(buf.pwm_hdr) < sizeof(buf.stream_hdr));
136                 ret = full_read(&pwm->in_fd, &buf.stream_hdr + 1,
137                                 sizeof(buf.pwm_hdr) - sizeof(buf.stream_hdr));
138
139                 if (ret)
140                         goto read_error;
141                 lte->resource_location = RESOURCE_NONEXISTENT;
142                 memcpy(hdr_ret, &buf.pwm_hdr, sizeof(buf.pwm_hdr));
143                 return 0;
144         }
145
146         if (le64_to_cpu(buf.stream_hdr.magic) != PWM_STREAM_MAGIC) {
147                 ERROR("Data read on pipe is invalid (expected stream header).");
148                 return WIMLIB_ERR_INVALID_PIPABLE_WIM;
149         }
150
151         copy_hash(lte->hash, buf.stream_hdr.hash);
152
153         reshdr.size_in_wim = 0;
154         reshdr.flags = le32_to_cpu(buf.stream_hdr.flags);
155         reshdr.offset_in_wim = pwm->in_fd.offset;
156         reshdr.uncompressed_size = le64_to_cpu(buf.stream_hdr.uncompressed_size);
157         wim_res_hdr_to_spec(&reshdr, pwm, rspec);
158         lte_bind_wim_resource_spec(lte, rspec);
159         lte->flags = rspec->flags;
160         lte->size = rspec->uncompressed_size;
161         lte->offset_in_res = 0;
162         return 0;
163
164 read_error:
165         if (ret != WIMLIB_ERR_UNEXPECTED_END_OF_FILE || !(flags & PWM_SILENT_EOF))
166                 ERROR_WITH_ERRNO("Error reading pipable WIM from pipe");
167         return ret;
168 }
169
170 static int
171 load_streams_from_pipe(struct apply_ctx *ctx,
172                        const struct read_stream_list_callbacks *cbs)
173 {
174         struct wim_lookup_table_entry *found_lte = NULL;
175         struct wim_resource_spec *rspec = NULL;
176         struct wim_lookup_table *lookup_table;
177         int ret;
178
179         ret = WIMLIB_ERR_NOMEM;
180         found_lte = new_lookup_table_entry();
181         if (!found_lte)
182                 goto out;
183
184         rspec = MALLOC(sizeof(struct wim_resource_spec));
185         if (!rspec)
186                 goto out;
187
188         lookup_table = ctx->wim->lookup_table;
189         memcpy(ctx->progress.extract.guid, ctx->wim->hdr.guid, WIM_GUID_LEN);
190         ctx->progress.extract.part_number = ctx->wim->hdr.part_number;
191         ctx->progress.extract.total_parts = ctx->wim->hdr.total_parts;
192         if (ctx->progress_func) {
193                 ctx->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_SPWM_PART_BEGIN,
194                                    &ctx->progress);
195         }
196         while (ctx->num_streams_remaining) {
197                 struct wim_header_disk pwm_hdr;
198                 struct wim_lookup_table_entry *needed_lte;
199
200                 if (found_lte->resource_location != RESOURCE_NONEXISTENT)
201                         lte_unbind_wim_resource_spec(found_lte);
202                 ret = read_pwm_stream_header(ctx->wim, found_lte, rspec,
203                                              PWM_ALLOW_WIM_HDR, &pwm_hdr);
204                 if (ret)
205                         goto out;
206
207                 if ((found_lte->resource_location != RESOURCE_NONEXISTENT)
208                     && !(found_lte->flags & WIM_RESHDR_FLAG_METADATA)
209                     && (needed_lte = lookup_stream(lookup_table, found_lte->hash))
210                     && (needed_lte->out_refcnt))
211                 {
212                         needed_lte->offset_in_res = found_lte->offset_in_res;
213                         needed_lte->flags = found_lte->flags;
214                         needed_lte->size = found_lte->size;
215
216                         lte_unbind_wim_resource_spec(found_lte);
217                         lte_bind_wim_resource_spec(needed_lte, rspec);
218
219                         ret = (*cbs->begin_stream)(needed_lte, 0,
220                                                    cbs->begin_stream_ctx);
221                         if (ret) {
222                                 lte_unbind_wim_resource_spec(needed_lte);
223                                 goto out;
224                         }
225
226                         ret = extract_stream(needed_lte, needed_lte->size,
227                                              cbs->consume_chunk,
228                                              cbs->consume_chunk_ctx);
229
230                         ret = (*cbs->end_stream)(needed_lte, ret,
231                                                  cbs->end_stream_ctx);
232                         lte_unbind_wim_resource_spec(needed_lte);
233                         if (ret)
234                                 goto out;
235                         ctx->num_streams_remaining--;
236                 } else if (found_lte->resource_location != RESOURCE_NONEXISTENT) {
237                         ret = skip_wim_stream(found_lte);
238                         if (ret)
239                                 goto out;
240                 } else {
241                         u16 part_number = le16_to_cpu(pwm_hdr.part_number);
242                         u16 total_parts = le16_to_cpu(pwm_hdr.total_parts);
243
244                         if (part_number != ctx->progress.extract.part_number ||
245                             total_parts != ctx->progress.extract.total_parts ||
246                             memcmp(pwm_hdr.guid, ctx->progress.extract.guid,
247                                    WIM_GUID_LEN))
248                         {
249                                 ctx->progress.extract.part_number = part_number;
250                                 ctx->progress.extract.total_parts = total_parts;
251                                 memcpy(ctx->progress.extract.guid,
252                                        pwm_hdr.guid, WIM_GUID_LEN);
253                                 if (ctx->progress_func) {
254                                         ctx->progress_func(
255                                                 WIMLIB_PROGRESS_MSG_EXTRACT_SPWM_PART_BEGIN,
256                                                            &ctx->progress);
257                                 }
258                         }
259                 }
260         }
261         ret = 0;
262 out:
263         if (found_lte->resource_location != RESOURCE_IN_WIM)
264                 FREE(rspec);
265         free_lookup_table_entry(found_lte);
266         return ret;
267 }
268
269 static int
270 begin_extract_stream_with_progress(struct wim_lookup_table_entry *lte,
271                                    u32 flags, void *_ctx)
272 {
273         struct apply_ctx *ctx = _ctx;
274
275         ctx->cur_stream = lte;
276
277         return (*ctx->saved_cbs->begin_stream)(lte, flags,
278                                                ctx->saved_cbs->begin_stream_ctx);
279 }
280
281 static int
282 consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx)
283 {
284         struct apply_ctx *ctx = _ctx;
285         wimlib_progress_func_t progress_func = ctx->progress_func;
286         union wimlib_progress_info *progress = &ctx->progress;
287         u32 num_copies = ctx->cur_stream->out_refcnt;
288
289         progress->extract.completed_bytes += size * num_copies;
290         if (progress->extract.completed_bytes >= ctx->next_progress) {
291                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS, progress);
292                 if (progress->extract.completed_bytes >=
293                     progress->extract.total_bytes)
294                 {
295                         ctx->next_progress = UINT64_MAX;
296                 } else {
297                         ctx->next_progress += progress->extract.total_bytes / 128;
298                         if (ctx->next_progress > progress->extract.total_bytes)
299                                 ctx->next_progress = progress->extract.total_bytes;
300                 }
301         }
302         return (*ctx->saved_cbs->consume_chunk)(chunk, size,
303                                                 ctx->saved_cbs->consume_chunk_ctx);
304 }
305
306 /*
307  * Read the list of single-instance streams to extract and feed their data into
308  * the specified callback functions.
309  *
310  * This handles checksumming each stream.
311  *
312  * This also handles sending WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS.
313  *
314  * This also works if the WIM is being read from a pipe, whereas attempting to
315  * read streams directly (e.g. with read_full_stream_into_buf()) will not.
316  */
317 int
318 extract_stream_list(struct apply_ctx *ctx,
319                     const struct read_stream_list_callbacks *cbs)
320 {
321         struct read_stream_list_callbacks wrapper_cbs = {
322                 .begin_stream      = begin_extract_stream_with_progress,
323                 .begin_stream_ctx  = ctx,
324                 .consume_chunk     = consume_chunk_with_progress,
325                 .consume_chunk_ctx = ctx,
326                 .end_stream        = cbs->end_stream,
327                 .end_stream_ctx    = cbs->end_stream_ctx,
328         };
329         if (ctx->progress_func) {
330                 ctx->saved_cbs = cbs;
331                 cbs = &wrapper_cbs;
332         }
333         if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
334                 return load_streams_from_pipe(ctx, cbs);
335         } else {
336                 return read_stream_list(&ctx->stream_list,
337                                         offsetof(struct wim_lookup_table_entry,
338                                                  extraction_list),
339                                         cbs, VERIFY_STREAM_HASHES);
340         }
341 }
342
343 /* Extract a WIM dentry to standard output.
344  *
345  * This obviously doesn't make sense in all cases.  We return an error if the
346  * dentry does not correspond to a regular file.  Otherwise we extract the
347  * unnamed data stream only.  */
348 static int
349 extract_dentry_to_stdout(struct wim_dentry *dentry,
350                          const struct wim_lookup_table *lookup_table)
351 {
352         struct wim_inode *inode = dentry->d_inode;
353         struct wim_lookup_table_entry *lte;
354         struct filedes _stdout;
355
356         if (inode->i_attributes & (FILE_ATTRIBUTE_REPARSE_POINT |
357                                    FILE_ATTRIBUTE_DIRECTORY))
358         {
359                 ERROR("\"%"TS"\" is not a regular file and therefore cannot be "
360                       "extracted to standard output", dentry_full_path(dentry));
361                 return WIMLIB_ERR_NOT_A_REGULAR_FILE;
362         }
363
364         lte = inode_unnamed_lte(inode, lookup_table);
365         if (!lte) {
366                 const u8 *hash = inode_unnamed_stream_hash(inode);
367                 if (!is_zero_hash(hash))
368                         return stream_not_found_error(inode, hash);
369                 return 0;
370         }
371
372         filedes_init(&_stdout, STDOUT_FILENO);
373         return extract_full_stream_to_fd(lte, &_stdout);
374 }
375
376 static int
377 extract_dentries_to_stdout(struct wim_dentry **dentries, size_t num_dentries,
378                            const struct wim_lookup_table *lookup_table)
379 {
380         for (size_t i = 0; i < num_dentries; i++) {
381                 int ret = extract_dentry_to_stdout(dentries[i], lookup_table);
382                 if (ret)
383                         return ret;
384         }
385         return 0;
386 }
387
388 /**********************************************************************/
389
390 /*
391  * Removes duplicate dentries from the array.
392  *
393  * Returns the new number of dentries, packed at the front of the array.
394  */
395 static size_t
396 remove_duplicate_trees(struct wim_dentry **trees, size_t num_trees)
397 {
398         size_t i, j = 0;
399         for (i = 0; i < num_trees; i++) {
400                 if (!trees[i]->tmp_flag) {
401                         /* Found distinct dentry.  */
402                         trees[i]->tmp_flag = 1;
403                         trees[j++] = trees[i];
404                 }
405         }
406         for (i = 0; i < j; i++)
407                 trees[i]->tmp_flag = 0;
408         return j;
409 }
410
411 /*
412  * Remove dentries that are descendants of other dentries in the array.
413  *
414  * Returns the new number of dentries, packed at the front of the array.
415  */
416 static size_t
417 remove_contained_trees(struct wim_dentry **trees, size_t num_trees)
418 {
419         size_t i, j = 0;
420         for (i = 0; i < num_trees; i++)
421                 trees[i]->tmp_flag = 1;
422         for (i = 0; i < num_trees; i++) {
423                 struct wim_dentry *d = trees[i];
424                 while (!dentry_is_root(d)) {
425                         d = d->parent;
426                         if (d->tmp_flag)
427                                 goto tree_contained;
428                 }
429                 trees[j++] = trees[i];
430                 continue;
431
432         tree_contained:
433                 trees[i]->tmp_flag = 0;
434         }
435
436         for (i = 0; i < j; i++)
437                 trees[i]->tmp_flag = 0;
438         return j;
439 }
440
441 static int
442 dentry_append_to_list(struct wim_dentry *dentry, void *_dentry_list)
443 {
444         struct list_head *dentry_list = _dentry_list;
445         list_add_tail(&dentry->d_extraction_list_node, dentry_list);
446         return 0;
447 }
448
449 static void
450 dentry_reset_extraction_list_node(struct wim_dentry *dentry)
451 {
452         dentry->d_extraction_list_node = (struct list_head){NULL, NULL};
453 }
454
455 static int
456 dentry_delete_from_list(struct wim_dentry *dentry, void *_ignore)
457 {
458         list_del(&dentry->d_extraction_list_node);
459         dentry_reset_extraction_list_node(dentry);
460         return 0;
461 }
462
463 /*
464  * Build the preliminary list of dentries to be extracted.
465  *
466  * The list maintains the invariant that if d1 and d2 are in the list and d1 is
467  * an ancestor of d2, then d1 appears before d2 in the list.
468  */
469 static void
470 build_dentry_list(struct list_head *dentry_list, struct wim_dentry **trees,
471                   size_t num_trees, bool add_ancestors)
472 {
473         INIT_LIST_HEAD(dentry_list);
474
475         /* Add the trees recursively.  */
476         for (size_t i = 0; i < num_trees; i++)
477                 for_dentry_in_tree(trees[i], dentry_append_to_list, dentry_list);
478
479         /* If requested, add ancestors of the trees.  */
480         if (add_ancestors) {
481                 for (size_t i = 0; i < num_trees; i++) {
482                         struct wim_dentry *dentry = trees[i];
483                         struct wim_dentry *ancestor;
484                         struct list_head *place_after;
485
486                         if (dentry_is_root(dentry))
487                                 continue;
488
489                         place_after = dentry_list;
490                         ancestor = dentry;
491                         do {
492                                 ancestor = ancestor->parent;
493                                 if (will_extract_dentry(ancestor)) {
494                                         place_after = &ancestor->d_extraction_list_node;
495                                         break;
496                                 }
497                         } while (!dentry_is_root(ancestor));
498
499                         ancestor = dentry;
500                         do {
501                                 ancestor = ancestor->parent;
502                                 if (will_extract_dentry(ancestor))
503                                         break;
504                                 list_add(&ancestor->d_extraction_list_node, place_after);
505                         } while (!dentry_is_root(ancestor));
506                 }
507         }
508 }
509
510 static void
511 destroy_dentry_list(struct list_head *dentry_list)
512 {
513         struct wim_dentry *dentry, *tmp;
514         struct wim_inode *inode;
515
516         list_for_each_entry_safe(dentry, tmp, dentry_list, d_extraction_list_node) {
517                 inode = dentry->d_inode;
518                 dentry_reset_extraction_list_node(dentry);
519                 inode->i_visited = 0;
520                 if ((void *)dentry->d_extraction_name != (void *)dentry->file_name)
521                         FREE(dentry->d_extraction_name);
522                 dentry->d_extraction_name = NULL;
523                 dentry->d_extraction_name_nchars = 0;
524         }
525 }
526
527 static void
528 destroy_stream_list(struct list_head *stream_list)
529 {
530         struct wim_lookup_table_entry *lte;
531
532         list_for_each_entry(lte, stream_list, extraction_list)
533                 if (lte->out_refcnt > ARRAY_LEN(lte->inline_stream_owners))
534                         FREE(lte->stream_owners);
535 }
536
537 #ifdef __WIN32__
538 static const utf16lechar replacement_char = cpu_to_le16(0xfffd);
539 #else
540 static const utf16lechar replacement_char = cpu_to_le16('?');
541 #endif
542
543 static bool
544 file_name_valid(utf16lechar *name, size_t num_chars, bool fix)
545 {
546         size_t i;
547
548         if (num_chars == 0)
549                 return true;
550         for (i = 0; i < num_chars; i++) {
551                 switch (name[i]) {
552         #ifdef __WIN32__
553                 case cpu_to_le16('\\'):
554                 case cpu_to_le16(':'):
555                 case cpu_to_le16('*'):
556                 case cpu_to_le16('?'):
557                 case cpu_to_le16('"'):
558                 case cpu_to_le16('<'):
559                 case cpu_to_le16('>'):
560                 case cpu_to_le16('|'):
561         #endif
562                 case cpu_to_le16('/'):
563                 case cpu_to_le16('\0'):
564                         if (fix)
565                                 name[i] = replacement_char;
566                         else
567                                 return false;
568                 }
569         }
570
571 #ifdef __WIN32__
572         if (name[num_chars - 1] == cpu_to_le16(' ') ||
573             name[num_chars - 1] == cpu_to_le16('.'))
574         {
575                 if (fix)
576                         name[num_chars - 1] = replacement_char;
577                 else
578                         return false;
579         }
580 #endif
581         return true;
582 }
583
584 static int
585 dentry_calculate_extraction_name(struct wim_dentry *dentry,
586                                  struct apply_ctx *ctx)
587 {
588         int ret;
589
590         if (!dentry_is_supported(dentry, &ctx->supported_features))
591                 goto skip_dentry;
592
593         if (dentry_is_root(dentry))
594                 return 0;
595
596         if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
597                 dentry->d_extraction_name = dentry->file_name;
598                 dentry->d_extraction_name_nchars = dentry->file_name_nbytes /
599                                                    sizeof(utf16lechar);
600                 return 0;
601         }
602
603         if (!ctx->supported_features.case_sensitive_filenames) {
604                 struct wim_dentry *other;
605                 list_for_each_entry(other, &dentry->d_ci_conflict_list,
606                                     d_ci_conflict_list)
607                 {
608                         if (will_extract_dentry(other)) {
609                                 if (ctx->extract_flags &
610                                     WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS) {
611                                         WARNING("\"%"TS"\" has the same "
612                                                 "case-insensitive name as "
613                                                 "\"%"TS"\"; extracting "
614                                                 "dummy name instead",
615                                                 dentry_full_path(dentry),
616                                                 dentry_full_path(other));
617                                         goto out_replace;
618                                 } else {
619                                         WARNING("Not extracting \"%"TS"\": "
620                                                 "has same case-insensitive "
621                                                 "name as \"%"TS"\"",
622                                                 dentry_full_path(dentry),
623                                                 dentry_full_path(other));
624                                         goto skip_dentry;
625                                 }
626                         }
627                 }
628         }
629
630         if (file_name_valid(dentry->file_name, dentry->file_name_nbytes / 2, false)) {
631                 ret = utf16le_get_tstr(dentry->file_name,
632                                        dentry->file_name_nbytes,
633                                        (const tchar **)&dentry->d_extraction_name,
634                                        &dentry->d_extraction_name_nchars);
635                 dentry->d_extraction_name_nchars /= sizeof(tchar);
636                 return ret;
637         } else {
638                 if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES)
639                 {
640                         WARNING("\"%"TS"\" has an invalid filename "
641                                 "that is not supported on this platform; "
642                                 "extracting dummy name instead",
643                                 dentry_full_path(dentry));
644                         goto out_replace;
645                 } else {
646                         WARNING("Not extracting \"%"TS"\": has an invalid filename "
647                                 "that is not supported on this platform",
648                                 dentry_full_path(dentry));
649                         goto skip_dentry;
650                 }
651         }
652
653 out_replace:
654         {
655                 utf16lechar utf16_name_copy[dentry->file_name_nbytes / 2];
656
657                 memcpy(utf16_name_copy, dentry->file_name, dentry->file_name_nbytes);
658                 file_name_valid(utf16_name_copy, dentry->file_name_nbytes / 2, true);
659
660                 const tchar *tchar_name;
661                 size_t tchar_nchars;
662
663                 ret = utf16le_get_tstr(utf16_name_copy,
664                                        dentry->file_name_nbytes,
665                                        &tchar_name, &tchar_nchars);
666                 if (ret)
667                         return ret;
668
669                 tchar_nchars /= sizeof(tchar);
670
671                 size_t fixed_name_num_chars = tchar_nchars;
672                 tchar fixed_name[tchar_nchars + 50];
673
674                 tmemcpy(fixed_name, tchar_name, tchar_nchars);
675                 fixed_name_num_chars += tsprintf(fixed_name + tchar_nchars,
676                                                  T(" (invalid filename #%lu)"),
677                                                  ++ctx->invalid_sequence);
678
679                 utf16le_put_tstr(tchar_name);
680
681                 dentry->d_extraction_name = memdup(fixed_name,
682                                                    2 * fixed_name_num_chars + 2);
683                 if (!dentry->d_extraction_name)
684                         return WIMLIB_ERR_NOMEM;
685                 dentry->d_extraction_name_nchars = fixed_name_num_chars;
686         }
687         return 0;
688
689 skip_dentry:
690         for_dentry_in_tree(dentry, dentry_delete_from_list, NULL);
691         return 0;
692 }
693
694 /*
695  * Calculate the actual filename component at which each WIM dentry will be
696  * extracted, with special handling for dentries that are unsupported by the
697  * extraction backend or have invalid names.
698  *
699  * ctx->supported_features must be filled in.
700  *
701  * Possible error codes: WIMLIB_ERR_NOMEM, WIMLIB_ERR_INVALID_UTF16_STRING
702  */
703 static int
704 dentry_list_calculate_extraction_names(struct list_head *dentry_list,
705                                        struct apply_ctx *ctx)
706 {
707         struct list_head *prev, *cur;
708
709         /* Can't use list_for_each_entry() because a call to
710          * dentry_calculate_extraction_name() may delete the current dentry and
711          * its children from the list.  */
712
713         prev = dentry_list;
714         for (;;) {
715                 struct wim_dentry *dentry;
716                 int ret;
717
718                 cur = prev->next;
719                 if (cur == dentry_list)
720                         break;
721
722                 dentry = list_entry(cur, struct wim_dentry, d_extraction_list_node);
723
724                 ret = dentry_calculate_extraction_name(dentry, ctx);
725                 if (ret)
726                         return ret;
727
728                 if (prev->next == cur)
729                         prev = cur;
730                 else
731                         ; /* Current dentry and its children (which follow in
732                              the list) were deleted.  prev stays the same.  */
733         }
734         return 0;
735 }
736
737 static int
738 dentry_resolve_streams(struct wim_dentry *dentry, int extract_flags,
739                        struct wim_lookup_table *lookup_table)
740 {
741         struct wim_inode *inode = dentry->d_inode;
742         struct wim_lookup_table_entry *lte;
743         int ret;
744         bool force = false;
745
746         /* Special case:  when extracting from a pipe, the WIM lookup table is
747          * initially empty, so "resolving" an inode's streams is initially not
748          * possible.  However, we still need to keep track of which streams,
749          * identified by SHA1 message digests, need to be extracted, so we
750          * "resolve" the inode's streams anyway by allocating new entries.  */
751         if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE)
752                 force = true;
753         ret = inode_resolve_streams(inode, lookup_table, force);
754         if (ret)
755                 return ret;
756         for (u32 i = 0; i <= inode->i_num_ads; i++) {
757                 lte = inode_stream_lte_resolved(inode, i);
758                 if (lte)
759                         lte->out_refcnt = 0;
760         }
761         return 0;
762 }
763
764 /*
765  * For each dentry to be extracted, resolve all streams in the corresponding
766  * inode and set 'out_refcnt' in each to 0.
767  *
768  * Possible error codes: WIMLIB_ERR_RESOURCE_NOT_FOUND, WIMLIB_ERR_NOMEM.
769  */
770 static int
771 dentry_list_resolve_streams(struct list_head *dentry_list,
772                             struct apply_ctx *ctx)
773 {
774         struct wim_dentry *dentry;
775         int ret;
776
777         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
778                 ret = dentry_resolve_streams(dentry,
779                                              ctx->extract_flags,
780                                              ctx->wim->lookup_table);
781                 if (ret)
782                         return ret;
783         }
784         return 0;
785 }
786
787 static int
788 ref_stream(struct wim_lookup_table_entry *lte, u32 stream_idx,
789            struct wim_dentry *dentry, struct apply_ctx *ctx)
790 {
791         struct wim_inode *inode = dentry->d_inode;
792         struct stream_owner *stream_owners;
793
794         if (!lte)
795                 return 0;
796
797         /* Tally the size only for each extraction of the stream (not hard
798          * links).  */
799         if (inode->i_visited && ctx->supported_features.hard_links)
800                 return 0;
801
802         ctx->progress.extract.total_bytes += lte->size;
803         ctx->progress.extract.num_streams++;
804
805         if (inode->i_visited)
806                 return 0;
807
808         /* Add stream to the dentry_list only one time, even if it's going
809          * to be extracted to multiple inodes.  */
810         if (lte->out_refcnt == 0) {
811                 list_add_tail(&lte->extraction_list, &ctx->stream_list);
812                 ctx->num_streams_remaining++;
813         }
814
815         /* If inode not yet been visited, append it to the stream_owners array.  */
816         if (lte->out_refcnt < ARRAY_LEN(lte->inline_stream_owners)) {
817                 stream_owners = lte->inline_stream_owners;
818         } else {
819                 struct stream_owner *prev_stream_owners;
820                 size_t alloc_stream_owners;
821
822                 if (lte->out_refcnt == ARRAY_LEN(lte->inline_stream_owners)) {
823                         prev_stream_owners = NULL;
824                         alloc_stream_owners = ARRAY_LEN(lte->inline_stream_owners);
825                 } else {
826                         prev_stream_owners = lte->stream_owners;
827                         alloc_stream_owners = lte->alloc_stream_owners;
828                 }
829
830                 if (lte->out_refcnt == alloc_stream_owners) {
831                         alloc_stream_owners *= 2;
832                         stream_owners = REALLOC(prev_stream_owners,
833                                                alloc_stream_owners *
834                                                 sizeof(stream_owners[0]));
835                         if (!stream_owners)
836                                 return WIMLIB_ERR_NOMEM;
837                         if (!prev_stream_owners) {
838                                 memcpy(stream_owners,
839                                        lte->inline_stream_owners,
840                                        sizeof(lte->inline_stream_owners));
841                         }
842                         lte->stream_owners = stream_owners;
843                         lte->alloc_stream_owners = alloc_stream_owners;
844                 }
845                 stream_owners = lte->stream_owners;
846         }
847         stream_owners[lte->out_refcnt].inode = inode;
848         if (stream_idx == 0) {
849                 stream_owners[lte->out_refcnt].stream_name = NULL;
850         } else {
851                 stream_owners[lte->out_refcnt].stream_name =
852                         inode->i_ads_entries[stream_idx - 1].stream_name;
853         }
854         lte->out_refcnt++;
855         return 0;
856 }
857
858 static int
859 dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx)
860 {
861         struct wim_inode *inode = dentry->d_inode;
862         int ret;
863
864         /* The unnamed data stream will always be extracted, except in an
865          * unlikely case.  */
866         if (!inode_is_encrypted_directory(inode)) {
867                 u16 stream_idx;
868                 struct wim_lookup_table_entry *stream;
869
870                 stream = inode_unnamed_stream_resolved(inode, &stream_idx);
871                 ret = ref_stream(stream, stream_idx, dentry, ctx);
872                 if (ret)
873                         return ret;
874         }
875
876         /* Named data streams will be extracted only if supported in the current
877          * extraction mode and volume, and to avoid complications, if not doing
878          * a linked extraction.  */
879         if (ctx->supported_features.named_data_streams) {
880                 for (u16 i = 0; i < inode->i_num_ads; i++) {
881                         if (!ads_entry_is_named_stream(&inode->i_ads_entries[i]))
882                                 continue;
883                         ret = ref_stream(inode->i_ads_entries[i].lte, i + 1,
884                                          dentry, ctx);
885                         if (ret)
886                                 return ret;
887                 }
888         }
889         inode->i_visited = 1;
890         return 0;
891 }
892
893 /*
894  * For each dentry to be extracted, iterate through the data streams of the
895  * corresponding inode.  For each such stream that is not to be ignored due to
896  * the supported features or extraction flags, add it to the list of streams to
897  * be extracted (ctx->stream_list) if not already done so.
898  *
899  * Also builds a mapping from each stream to the inodes referencing it.
900  *
901  * This also initializes the extract progress info with byte and stream
902  * information.
903  *
904  * ctx->supported_features must be filled in.
905  *
906  * Possible error codes: WIMLIB_ERR_NOMEM.
907  */
908 static int
909 dentry_list_ref_streams(struct list_head *dentry_list, struct apply_ctx *ctx)
910 {
911         struct wim_dentry *dentry;
912         int ret;
913
914         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
915                 ret = dentry_ref_streams(dentry, ctx);
916                 if (ret)
917                         return ret;
918         }
919         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
920                 dentry->d_inode->i_visited = 0;
921         return 0;
922 }
923
924 static void
925 dentry_list_build_inode_alias_lists(struct list_head *dentry_list)
926 {
927         struct wim_dentry *dentry;
928         struct wim_inode *inode;
929
930         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
931                 inode = dentry->d_inode;
932                 if (!inode->i_visited)
933                         INIT_LIST_HEAD(&inode->i_extraction_aliases);
934                 list_add_tail(&dentry->d_extraction_alias_node,
935                               &inode->i_extraction_aliases);
936                 inode->i_visited = 1;
937         }
938         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
939                 dentry->d_inode->i_visited = 0;
940 }
941
942 static void
943 inode_tally_features(const struct wim_inode *inode,
944                      struct wim_features *features)
945 {
946         if (inode->i_attributes & FILE_ATTRIBUTE_ARCHIVE)
947                 features->archive_files++;
948         if (inode->i_attributes & FILE_ATTRIBUTE_HIDDEN)
949                 features->hidden_files++;
950         if (inode->i_attributes & FILE_ATTRIBUTE_SYSTEM)
951                 features->system_files++;
952         if (inode->i_attributes & FILE_ATTRIBUTE_COMPRESSED)
953                 features->compressed_files++;
954         if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
955                 if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)
956                         features->encrypted_directories++;
957                 else
958                         features->encrypted_files++;
959         }
960         if (inode->i_attributes & FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)
961                 features->not_context_indexed_files++;
962         if (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE)
963                 features->sparse_files++;
964         if (inode_has_named_stream(inode))
965                 features->named_data_streams++;
966         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
967                 features->reparse_points++;
968                 if (inode_is_symlink(inode))
969                         features->symlink_reparse_points++;
970                 else
971                         features->other_reparse_points++;
972         }
973         if (inode->i_security_id != -1)
974                 features->security_descriptors++;
975         if (inode_has_unix_data(inode))
976                 features->unix_data++;
977 }
978
979 /* Tally features necessary to extract a dentry and the corresponding inode.  */
980 static void
981 dentry_tally_features(struct wim_dentry *dentry, struct wim_features *features)
982 {
983         struct wim_inode *inode = dentry->d_inode;
984
985         if (dentry_has_short_name(dentry))
986                 features->short_names++;
987
988         if (inode->i_visited) {
989                 features->hard_links++;
990         } else {
991                 inode_tally_features(inode, features);
992                 inode->i_visited = 1;
993         }
994 }
995
996 /* Tally the features necessary to extract the specified dentries.  */
997 static void
998 dentry_list_get_features(struct list_head *dentry_list,
999                          struct wim_features *features)
1000 {
1001         struct wim_dentry *dentry;
1002
1003         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
1004                 dentry_tally_features(dentry, features);
1005
1006         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
1007                 dentry->d_inode->i_visited = 0;
1008 }
1009
1010 static int
1011 do_feature_check(const struct wim_features *required_features,
1012                  const struct wim_features *supported_features,
1013                  int extract_flags)
1014 {
1015         /* File attributes.  */
1016         if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES)) {
1017                 /* Note: Don't bother the user about FILE_ATTRIBUTE_ARCHIVE.
1018                  * We're an archive program, so theoretically we can do what we
1019                  * want with it.  */
1020
1021                 if (required_features->hidden_files &&
1022                     !supported_features->hidden_files)
1023                         WARNING("Ignoring FILE_ATTRIBUTE_HIDDEN of %lu files",
1024                                 required_features->hidden_files);
1025
1026                 if (required_features->system_files &&
1027                     !supported_features->system_files)
1028                         WARNING("Ignoring FILE_ATTRIBUTE_SYSTEM of %lu files",
1029                                 required_features->system_files);
1030
1031                 if (required_features->compressed_files &&
1032                     !supported_features->compressed_files)
1033                         WARNING("Ignoring FILE_ATTRIBUTE_COMPRESSED of %lu files",
1034                                 required_features->compressed_files);
1035
1036                 if (required_features->not_context_indexed_files &&
1037                     !supported_features->not_context_indexed_files)
1038                         WARNING("Ignoring FILE_ATTRIBUTE_NOT_CONTENT_INDEXED of %lu files",
1039                                 required_features->not_context_indexed_files);
1040
1041                 if (required_features->sparse_files &&
1042                     !supported_features->sparse_files)
1043                         WARNING("Ignoring FILE_ATTRIBUTE_SPARSE_FILE of %lu files",
1044                                 required_features->sparse_files);
1045
1046                 if (required_features->encrypted_directories &&
1047                     !supported_features->encrypted_directories)
1048                         WARNING("Ignoring FILE_ATTRIBUTE_ENCRYPTED of %lu directories",
1049                                 required_features->encrypted_directories);
1050         }
1051
1052         /* Encrypted files.  */
1053         if (required_features->encrypted_files &&
1054             !supported_features->encrypted_files)
1055                 WARNING("Ignoring %lu encrypted files",
1056                         required_features->encrypted_files);
1057
1058         /* Named data streams.  */
1059         if (required_features->named_data_streams &&
1060             (!supported_features->named_data_streams))
1061                 WARNING("Ignoring named data streams of %lu files",
1062                         required_features->named_data_streams);
1063
1064         /* Hard links.  */
1065         if (required_features->hard_links && !supported_features->hard_links)
1066                 WARNING("Extracting %lu hard links as independent files",
1067                         required_features->hard_links);
1068
1069         /* Symbolic links and reparse points.  */
1070         if ((extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_SYMLINKS) &&
1071             required_features->symlink_reparse_points &&
1072             !supported_features->symlink_reparse_points &&
1073             !supported_features->reparse_points)
1074         {
1075                 ERROR("Extraction backend does not support symbolic links!");
1076                 return WIMLIB_ERR_UNSUPPORTED;
1077         }
1078         if (required_features->reparse_points &&
1079             !supported_features->reparse_points)
1080         {
1081                 if (supported_features->symlink_reparse_points) {
1082                         if (required_features->other_reparse_points) {
1083                                 WARNING("Ignoring %lu non-symlink/junction "
1084                                         "reparse point files",
1085                                         required_features->other_reparse_points);
1086                         }
1087                 } else {
1088                         WARNING("Ignoring %lu reparse point files",
1089                                 required_features->reparse_points);
1090                 }
1091         }
1092
1093         /* Security descriptors.  */
1094         if (((extract_flags & (WIMLIB_EXTRACT_FLAG_STRICT_ACLS |
1095                                WIMLIB_EXTRACT_FLAG_UNIX_DATA))
1096              == WIMLIB_EXTRACT_FLAG_STRICT_ACLS) &&
1097             required_features->security_descriptors &&
1098             !supported_features->security_descriptors)
1099         {
1100                 ERROR("Extraction backend does not support security descriptors!");
1101                 return WIMLIB_ERR_UNSUPPORTED;
1102         }
1103         if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ACLS) &&
1104             required_features->security_descriptors &&
1105             !supported_features->security_descriptors)
1106                 WARNING("Ignoring Windows NT security descriptors of %lu files",
1107                         required_features->security_descriptors);
1108
1109         /* UNIX data.  */
1110         if ((extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) &&
1111             required_features->unix_data && !supported_features->unix_data)
1112         {
1113                 ERROR("Extraction backend does not support UNIX data!");
1114                 return WIMLIB_ERR_UNSUPPORTED;
1115         }
1116
1117         /* DOS Names.  */
1118         if (required_features->short_names &&
1119             !supported_features->short_names)
1120         {
1121                 if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_SHORT_NAMES) {
1122                         ERROR("Extraction backend does not support DOS names!");
1123                         return WIMLIB_ERR_UNSUPPORTED;
1124                 }
1125                 WARNING("Ignoring DOS names of %lu files",
1126                         required_features->short_names);
1127         }
1128
1129         /* Timestamps.  */
1130         if ((extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS) &&
1131             !supported_features->timestamps)
1132         {
1133                 ERROR("Extraction backend does not support timestamps!");
1134                 return WIMLIB_ERR_UNSUPPORTED;
1135         }
1136
1137         return 0;
1138 }
1139
1140 static const struct apply_operations *
1141 select_apply_operations(int extract_flags)
1142 {
1143 #ifdef WITH_NTFS_3G
1144         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS)
1145                 return &ntfs_3g_apply_ops;
1146 #endif
1147 #ifdef __WIN32__
1148         return &win32_apply_ops;
1149 #else
1150         return &unix_apply_ops;
1151 #endif
1152 }
1153
1154 static int
1155 extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
1156               const tchar *target, int extract_flags,
1157               wimlib_progress_func_t progress_func)
1158 {
1159         const struct apply_operations *ops;
1160         struct apply_ctx *ctx;
1161         int ret;
1162         LIST_HEAD(dentry_list);
1163
1164         if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) {
1165                 ret = extract_dentries_to_stdout(trees, num_trees,
1166                                                  wim->lookup_table);
1167                 goto out;
1168         }
1169
1170         num_trees = remove_duplicate_trees(trees, num_trees);
1171         num_trees = remove_contained_trees(trees, num_trees);
1172
1173         ops = select_apply_operations(extract_flags);
1174
1175         if (num_trees > 1 && ops->single_tree_only) {
1176                 ERROR("Extracting multiple directory trees "
1177                       "at once is not supported in %s extraction mode!",
1178                       ops->name);
1179                 ret = WIMLIB_ERR_UNSUPPORTED;
1180                 goto out;
1181         }
1182
1183         ctx = CALLOC(1, ops->context_size);
1184         if (!ctx) {
1185                 ret = WIMLIB_ERR_NOMEM;
1186                 goto out;
1187         }
1188
1189         ctx->wim = wim;
1190         ctx->target = target;
1191         ctx->target_nchars = tstrlen(target);
1192         ctx->extract_flags = extract_flags;
1193         if (progress_func) {
1194                 ctx->progress_func = progress_func;
1195                 ctx->progress.extract.image = wim->current_image;
1196                 ctx->progress.extract.extract_flags = (extract_flags &
1197                                                        WIMLIB_EXTRACT_MASK_PUBLIC);
1198                 ctx->progress.extract.wimfile_name = wim->filename;
1199                 ctx->progress.extract.image_name = wimlib_get_image_name(wim,
1200                                                                          wim->current_image);
1201                 ctx->progress.extract.target = target;
1202         }
1203         INIT_LIST_HEAD(&ctx->stream_list);
1204
1205         ret = (*ops->get_supported_features)(target, &ctx->supported_features);
1206         if (ret)
1207                 goto out_cleanup;
1208
1209         build_dentry_list(&dentry_list, trees, num_trees,
1210                           !(extract_flags &
1211                             WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE));
1212
1213         dentry_list_get_features(&dentry_list, &ctx->required_features);
1214
1215         ret = do_feature_check(&ctx->required_features, &ctx->supported_features,
1216                                ctx->extract_flags);
1217         if (ret)
1218                 goto out_cleanup;
1219
1220         ret = dentry_list_calculate_extraction_names(&dentry_list, ctx);
1221         if (ret)
1222                 goto out_cleanup;
1223
1224         ret = dentry_list_resolve_streams(&dentry_list, ctx);
1225         if (ret)
1226                 goto out_cleanup;
1227
1228         ret = dentry_list_ref_streams(&dentry_list, ctx);
1229         if (ret)
1230                 goto out_cleanup;
1231
1232         dentry_list_build_inode_alias_lists(&dentry_list);
1233
1234         if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
1235                 /* When extracting from a pipe, the number of bytes of data to
1236                  * extract can't be determined in the normal way (examining the
1237                  * lookup table), since at this point all we have is a set of
1238                  * SHA1 message digests of streams that need to be extracted.
1239                  * However, we can get a reasonably accurate estimate by taking
1240                  * <TOTALBYTES> from the corresponding <IMAGE> in the WIM XML
1241                  * data.  This does assume that a full image is being extracted,
1242                  * but currently there is no API for doing otherwise.  (Also,
1243                  * subtract <HARDLINKBYTES> from this if hard links are
1244                  * supported by the extraction mode.)  */
1245                 ctx->progress.extract.total_bytes =
1246                         wim_info_get_image_total_bytes(wim->wim_info,
1247                                                        wim->current_image);
1248                 if (ctx->supported_features.hard_links) {
1249                         ctx->progress.extract.total_bytes -=
1250                                 wim_info_get_image_hard_link_bytes(wim->wim_info,
1251                                                                    wim->current_image);
1252                 }
1253         }
1254
1255         if (ctx->progress_func) {
1256                 int msg;
1257                 if (extract_flags & WIMLIB_EXTRACT_FLAG_IMAGEMODE)
1258                         msg = WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN;
1259                 else
1260                         msg = WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN;
1261                 (*ctx->progress_func)(msg, &ctx->progress);
1262         }
1263
1264         ret = (*ops->extract)(&dentry_list, ctx);
1265         if (ret)
1266                 goto out_cleanup;
1267
1268         if (ctx->progress_func &&
1269             ctx->progress.extract.completed_bytes <
1270                 ctx->progress.extract.total_bytes)
1271         {
1272                 ctx->progress.extract.completed_bytes =
1273                         ctx->progress.extract.total_bytes;
1274                 (*ctx->progress_func)(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS,
1275                                       &ctx->progress);
1276         }
1277
1278         if (ctx->progress_func) {
1279                 int msg;
1280                 if (extract_flags & WIMLIB_EXTRACT_FLAG_IMAGEMODE)
1281                         msg = WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END;
1282                 else
1283                         msg = WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END;
1284                 (*ctx->progress_func)(msg, &ctx->progress);
1285         }
1286         ret = 0;
1287 out_cleanup:
1288         destroy_stream_list(&ctx->stream_list);
1289         destroy_dentry_list(&dentry_list);
1290         FREE(ctx);
1291 out:
1292         return ret;
1293 }
1294
1295 static int
1296 mkdir_if_needed(const tchar *target)
1297 {
1298         struct stat stbuf;
1299         if (tstat(target, &stbuf)) {
1300                 if (errno == ENOENT) {
1301                         if (tmkdir(target, 0755)) {
1302                                 ERROR_WITH_ERRNO("Failed to create directory "
1303                                                  "\"%"TS"\"", target);
1304                                 return WIMLIB_ERR_MKDIR;
1305                         }
1306                 } else {
1307                         ERROR_WITH_ERRNO("Failed to stat \"%"TS"\"", target);
1308                         return WIMLIB_ERR_STAT;
1309                 }
1310         } else if (!S_ISDIR(stbuf.st_mode)) {
1311                 ERROR("\"%"TS"\" is not a directory", target);
1312                 return WIMLIB_ERR_NOTDIR;
1313         }
1314         return 0;
1315 }
1316
1317 /* Make sure the extraction flags make sense, and update them if needed.  */
1318 static int
1319 check_extract_flags(const WIMStruct *wim, int *extract_flags_p)
1320 {
1321         int extract_flags = *extract_flags_p;
1322
1323         /* Check for invalid flag combinations  */
1324
1325         if ((extract_flags &
1326              (WIMLIB_EXTRACT_FLAG_NO_ACLS |
1327               WIMLIB_EXTRACT_FLAG_STRICT_ACLS)) == (WIMLIB_EXTRACT_FLAG_NO_ACLS |
1328                                                     WIMLIB_EXTRACT_FLAG_STRICT_ACLS))
1329                 return WIMLIB_ERR_INVALID_PARAM;
1330
1331         if ((extract_flags &
1332              (WIMLIB_EXTRACT_FLAG_RPFIX |
1333               WIMLIB_EXTRACT_FLAG_NORPFIX)) == (WIMLIB_EXTRACT_FLAG_RPFIX |
1334                                                 WIMLIB_EXTRACT_FLAG_NORPFIX))
1335                 return WIMLIB_ERR_INVALID_PARAM;
1336
1337 #ifndef WITH_NTFS_3G
1338         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
1339                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
1340                       "        it cannot apply a WIM image directly to an NTFS volume.");
1341                 return WIMLIB_ERR_UNSUPPORTED;
1342         }
1343 #endif
1344
1345 #ifndef __WIN32__
1346         if (extract_flags & WIMLIB_EXTRACT_FLAG_WIMBOOT) {
1347                 ERROR("WIMBoot extraction is only supported on Windows!");
1348                 return WIMLIB_ERR_UNSUPPORTED;
1349         }
1350 #endif
1351
1352         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
1353                               WIMLIB_EXTRACT_FLAG_NORPFIX |
1354                               WIMLIB_EXTRACT_FLAG_IMAGEMODE)) ==
1355                                         WIMLIB_EXTRACT_FLAG_IMAGEMODE)
1356         {
1357                 /* For full-image extraction, do reparse point fixups by default
1358                  * if the WIM header says they are enabled.  */
1359                 if (wim->hdr.flags & WIM_HDR_FLAG_RP_FIX)
1360                         extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX;
1361         }
1362
1363         *extract_flags_p = extract_flags;
1364         return 0;
1365 }
1366
1367 static u32
1368 get_wildcard_flags(int extract_flags)
1369 {
1370         u32 wildcard_flags = 0;
1371
1372         if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_GLOB)
1373                 wildcard_flags |= WILDCARD_FLAG_ERROR_IF_NO_MATCH;
1374         else
1375                 wildcard_flags |= WILDCARD_FLAG_WARN_IF_NO_MATCH;
1376
1377         if (default_ignore_case)
1378                 wildcard_flags |= WILDCARD_FLAG_CASE_INSENSITIVE;
1379
1380         return wildcard_flags;
1381 }
1382
1383 struct append_dentry_ctx {
1384         struct wim_dentry **dentries;
1385         size_t num_dentries;
1386         size_t num_alloc_dentries;
1387 };
1388
1389 static int
1390 append_dentry_cb(struct wim_dentry *dentry, void *_ctx)
1391 {
1392         struct append_dentry_ctx *ctx = _ctx;
1393
1394         if (ctx->num_dentries == ctx->num_alloc_dentries) {
1395                 struct wim_dentry **new_dentries;
1396                 size_t new_length;
1397
1398                 new_length = max(ctx->num_alloc_dentries + 8,
1399                                  ctx->num_alloc_dentries * 3 / 2);
1400                 new_dentries = REALLOC(ctx->dentries,
1401                                        new_length * sizeof(ctx->dentries[0]));
1402                 if (new_dentries == NULL)
1403                         return WIMLIB_ERR_NOMEM;
1404                 ctx->dentries = new_dentries;
1405                 ctx->num_alloc_dentries = new_length;
1406         }
1407         ctx->dentries[ctx->num_dentries++] = dentry;
1408         return 0;
1409 }
1410
1411 static int
1412 do_wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target,
1413                         const tchar * const *paths, size_t num_paths,
1414                         int extract_flags, wimlib_progress_func_t progress_func)
1415 {
1416         int ret;
1417         struct wim_dentry **trees;
1418         size_t num_trees;
1419
1420         if (wim == NULL || target == NULL || target[0] == T('\0') ||
1421             (num_paths != 0 && paths == NULL))
1422                 return WIMLIB_ERR_INVALID_PARAM;
1423
1424         ret = check_extract_flags(wim, &extract_flags);
1425         if (ret)
1426                 return ret;
1427
1428         ret = select_wim_image(wim, image);
1429         if (ret)
1430                 return ret;
1431
1432         ret = wim_checksum_unhashed_streams(wim);
1433         if (ret)
1434                 return ret;
1435
1436         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_NTFS |
1437                               WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE)) ==
1438             (WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE))
1439         {
1440                 ret = mkdir_if_needed(target);
1441                 if (ret)
1442                         return ret;
1443         }
1444
1445         if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) {
1446
1447                 struct append_dentry_ctx append_dentry_ctx = {
1448                         .dentries = NULL,
1449                         .num_dentries = 0,
1450                         .num_alloc_dentries = 0,
1451                 };
1452
1453                 u32 wildcard_flags = get_wildcard_flags(extract_flags);
1454
1455                 for (size_t i = 0; i < num_paths; i++) {
1456                         tchar *path = canonicalize_wim_path(paths[i]);
1457                         if (path == NULL) {
1458                                 ret = WIMLIB_ERR_NOMEM;
1459                                 trees = append_dentry_ctx.dentries;
1460                                 goto out_free_trees;
1461                         }
1462                         ret = expand_wildcard(wim, path,
1463                                               append_dentry_cb,
1464                                               &append_dentry_ctx,
1465                                               wildcard_flags);
1466                         FREE(path);
1467                         if (ret) {
1468                                 trees = append_dentry_ctx.dentries;
1469                                 goto out_free_trees;
1470                         }
1471                 }
1472                 trees = append_dentry_ctx.dentries;
1473                 num_trees = append_dentry_ctx.num_dentries;
1474         } else {
1475                 trees = MALLOC(num_paths * sizeof(trees[0]));
1476                 if (trees == NULL)
1477                         return WIMLIB_ERR_NOMEM;
1478
1479                 for (size_t i = 0; i < num_paths; i++) {
1480
1481                         tchar *path = canonicalize_wim_path(paths[i]);
1482                         if (path == NULL) {
1483                                 ret = WIMLIB_ERR_NOMEM;
1484                                 goto out_free_trees;
1485                         }
1486
1487                         trees[i] = get_dentry(wim, path,
1488                                               WIMLIB_CASE_PLATFORM_DEFAULT);
1489                         FREE(path);
1490                         if (trees[i] == NULL) {
1491                                   ERROR("Path \"%"TS"\" does not exist "
1492                                         "in WIM image %d",
1493                                         paths[i], wim->current_image);
1494                                   ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
1495                                   goto out_free_trees;
1496                         }
1497                 }
1498                 num_trees = num_paths;
1499         }
1500
1501         if (num_trees == 0) {
1502                 ret = 0;
1503                 goto out_free_trees;
1504         }
1505
1506         ret = extract_trees(wim, trees, num_trees,
1507                             target, extract_flags, progress_func);
1508 out_free_trees:
1509         FREE(trees);
1510         return ret;
1511 }
1512
1513 static int
1514 extract_single_image(WIMStruct *wim, int image,
1515                      const tchar *target, int extract_flags,
1516                      wimlib_progress_func_t progress_func)
1517 {
1518         const tchar *path = WIMLIB_WIM_ROOT_PATH;
1519         extract_flags |= WIMLIB_EXTRACT_FLAG_IMAGEMODE;
1520         return do_wimlib_extract_paths(wim, image, target, &path, 1,
1521                                        extract_flags, progress_func);
1522 }
1523
1524 static const tchar * const filename_forbidden_chars =
1525 T(
1526 #ifdef __WIN32__
1527 "<>:\"/\\|?*"
1528 #else
1529 "/"
1530 #endif
1531 );
1532
1533 /* This function checks if it is okay to use a WIM image's name as a directory
1534  * name.  */
1535 static bool
1536 image_name_ok_as_dir(const tchar *image_name)
1537 {
1538         return image_name && *image_name &&
1539                 !tstrpbrk(image_name, filename_forbidden_chars) &&
1540                 tstrcmp(image_name, T(".")) &&
1541                 tstrcmp(image_name, T(".."));
1542 }
1543
1544 /* Extracts all images from the WIM to the directory @target, with the images
1545  * placed in subdirectories named by their image names. */
1546 static int
1547 extract_all_images(WIMStruct *wim,
1548                    const tchar *target,
1549                    int extract_flags,
1550                    wimlib_progress_func_t progress_func)
1551 {
1552         size_t image_name_max_len = max(xml_get_max_image_name_len(wim), 20);
1553         size_t output_path_len = tstrlen(target);
1554         tchar buf[output_path_len + 1 + image_name_max_len + 1];
1555         int ret;
1556         int image;
1557         const tchar *image_name;
1558
1559         extract_flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
1560
1561         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
1562                 ERROR("Cannot extract multiple images in NTFS extraction mode.");
1563                 return WIMLIB_ERR_INVALID_PARAM;
1564         }
1565
1566         ret = mkdir_if_needed(target);
1567         if (ret)
1568                 return ret;
1569         tmemcpy(buf, target, output_path_len);
1570         buf[output_path_len] = OS_PREFERRED_PATH_SEPARATOR;
1571         for (image = 1; image <= wim->hdr.image_count; image++) {
1572                 image_name = wimlib_get_image_name(wim, image);
1573                 if (image_name_ok_as_dir(image_name)) {
1574                         tstrcpy(buf + output_path_len + 1, image_name);
1575                 } else {
1576                         /* Image name is empty or contains forbidden characters.
1577                          * Use image number instead. */
1578                         tsprintf(buf + output_path_len + 1, T("%d"), image);
1579                 }
1580                 ret = extract_single_image(wim, image, buf, extract_flags,
1581                                            progress_func);
1582                 if (ret)
1583                         return ret;
1584         }
1585         return 0;
1586 }
1587
1588 static int
1589 do_wimlib_extract_image(WIMStruct *wim,
1590                         int image,
1591                         const tchar *target,
1592                         int extract_flags,
1593                         wimlib_progress_func_t progress_func)
1594 {
1595         if (extract_flags & (WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE |
1596                              WIMLIB_EXTRACT_FLAG_TO_STDOUT |
1597                              WIMLIB_EXTRACT_FLAG_GLOB_PATHS))
1598                 return WIMLIB_ERR_INVALID_PARAM;
1599
1600         if (image == WIMLIB_ALL_IMAGES)
1601                 return extract_all_images(wim, target, extract_flags,
1602                                           progress_func);
1603         else
1604                 return extract_single_image(wim, image, target, extract_flags,
1605                                             progress_func);
1606 }
1607
1608
1609 /****************************************************************************
1610  *                          Extraction API                                  *
1611  ****************************************************************************/
1612
1613 WIMLIBAPI int
1614 wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target,
1615                      const tchar * const *paths, size_t num_paths,
1616                      int extract_flags, wimlib_progress_func_t progress_func)
1617 {
1618         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
1619                 return WIMLIB_ERR_INVALID_PARAM;
1620
1621         return do_wimlib_extract_paths(wim, image, target, paths, num_paths,
1622                                        extract_flags, progress_func);
1623 }
1624
1625 WIMLIBAPI int
1626 wimlib_extract_pathlist(WIMStruct *wim, int image, const tchar *target,
1627                         const tchar *path_list_file, int extract_flags,
1628                         wimlib_progress_func_t progress_func)
1629 {
1630         int ret;
1631         tchar **paths;
1632         size_t num_paths;
1633         void *mem;
1634
1635         ret = read_path_list_file(path_list_file, &paths, &num_paths, &mem);
1636         if (ret) {
1637                 ERROR("Failed to read path list file \"%"TS"\"",
1638                       path_list_file);
1639                 return ret;
1640         }
1641
1642         ret = wimlib_extract_paths(wim, image, target,
1643                                    (const tchar * const *)paths, num_paths,
1644                                    extract_flags, progress_func);
1645         FREE(paths);
1646         FREE(mem);
1647         return ret;
1648 }
1649
1650 WIMLIBAPI int
1651 wimlib_extract_image_from_pipe(int pipe_fd, const tchar *image_num_or_name,
1652                                const tchar *target, int extract_flags,
1653                                wimlib_progress_func_t progress_func)
1654 {
1655         int ret;
1656         WIMStruct *pwm;
1657         struct filedes *in_fd;
1658         int image;
1659         unsigned i;
1660
1661         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
1662                 return WIMLIB_ERR_INVALID_PARAM;
1663
1664         /* Read the WIM header from the pipe and get a WIMStruct to represent
1665          * the pipable WIM.  Caveats:  Unlike getting a WIMStruct with
1666          * wimlib_open_wim(), getting a WIMStruct in this way will result in
1667          * an empty lookup table, no XML data read, and no filename set.  */
1668         ret = open_wim_as_WIMStruct(&pipe_fd,
1669                                     WIMLIB_OPEN_FLAG_FROM_PIPE,
1670                                     &pwm, progress_func);
1671         if (ret)
1672                 return ret;
1673
1674         /* Sanity check to make sure this is a pipable WIM.  */
1675         if (pwm->hdr.magic != PWM_MAGIC) {
1676                 ERROR("The WIM being read from file descriptor %d "
1677                       "is not pipable!", pipe_fd);
1678                 ret = WIMLIB_ERR_NOT_PIPABLE;
1679                 goto out_wimlib_free;
1680         }
1681
1682         /* Sanity check to make sure the first part of a pipable split WIM is
1683          * sent over the pipe first.  */
1684         if (pwm->hdr.part_number != 1) {
1685                 ERROR("The first part of the split WIM must be "
1686                       "sent over the pipe first.");
1687                 ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1688                 goto out_wimlib_free;
1689         }
1690
1691         in_fd = &pwm->in_fd;
1692         wimlib_assert(in_fd->offset == WIM_HEADER_DISK_SIZE);
1693
1694         /* As mentioned, the WIMStruct we created from the pipe does not have
1695          * XML data yet.  Fix this by reading the extra copy of the XML data
1696          * that directly follows the header in pipable WIMs.  (Note: see
1697          * write_pipable_wim() for more details about the format of pipable
1698          * WIMs.)  */
1699         {
1700                 struct wim_lookup_table_entry xml_lte;
1701                 struct wim_resource_spec xml_rspec;
1702                 ret = read_pwm_stream_header(pwm, &xml_lte, &xml_rspec, 0, NULL);
1703                 if (ret)
1704                         goto out_wimlib_free;
1705
1706                 if (!(xml_lte.flags & WIM_RESHDR_FLAG_METADATA))
1707                 {
1708                         ERROR("Expected XML data, but found non-metadata "
1709                               "stream.");
1710                         ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1711                         goto out_wimlib_free;
1712                 }
1713
1714                 wim_res_spec_to_hdr(&xml_rspec, &pwm->hdr.xml_data_reshdr);
1715
1716                 ret = read_wim_xml_data(pwm);
1717                 if (ret)
1718                         goto out_wimlib_free;
1719
1720                 if (wim_info_get_num_images(pwm->wim_info) != pwm->hdr.image_count) {
1721                         ERROR("Image count in XML data is not the same as in WIM header.");
1722                         ret = WIMLIB_ERR_IMAGE_COUNT;
1723                         goto out_wimlib_free;
1724                 }
1725         }
1726
1727         /* Get image index (this may use the XML data that was just read to
1728          * resolve an image name).  */
1729         if (image_num_or_name) {
1730                 image = wimlib_resolve_image(pwm, image_num_or_name);
1731                 if (image == WIMLIB_NO_IMAGE) {
1732                         ERROR("\"%"TS"\" is not a valid image in the pipable WIM!",
1733                               image_num_or_name);
1734                         ret = WIMLIB_ERR_INVALID_IMAGE;
1735                         goto out_wimlib_free;
1736                 } else if (image == WIMLIB_ALL_IMAGES) {
1737                         ERROR("Applying all images from a pipe is not supported!");
1738                         ret = WIMLIB_ERR_INVALID_IMAGE;
1739                         goto out_wimlib_free;
1740                 }
1741         } else {
1742                 if (pwm->hdr.image_count != 1) {
1743                         ERROR("No image was specified, but the pipable WIM "
1744                               "did not contain exactly 1 image");
1745                         ret = WIMLIB_ERR_INVALID_IMAGE;
1746                         goto out_wimlib_free;
1747                 }
1748                 image = 1;
1749         }
1750
1751         /* Load the needed metadata resource.  */
1752         for (i = 1; i <= pwm->hdr.image_count; i++) {
1753                 struct wim_lookup_table_entry *metadata_lte;
1754                 struct wim_image_metadata *imd;
1755                 struct wim_resource_spec *metadata_rspec;
1756
1757                 metadata_lte = new_lookup_table_entry();
1758                 if (metadata_lte == NULL) {
1759                         ret = WIMLIB_ERR_NOMEM;
1760                         goto out_wimlib_free;
1761                 }
1762                 metadata_rspec = MALLOC(sizeof(struct wim_resource_spec));
1763                 if (metadata_rspec == NULL) {
1764                         ret = WIMLIB_ERR_NOMEM;
1765                         free_lookup_table_entry(metadata_lte);
1766                         goto out_wimlib_free;
1767                 }
1768
1769                 ret = read_pwm_stream_header(pwm, metadata_lte, metadata_rspec, 0, NULL);
1770                 imd = pwm->image_metadata[i - 1];
1771                 imd->metadata_lte = metadata_lte;
1772                 if (ret) {
1773                         FREE(metadata_rspec);
1774                         goto out_wimlib_free;
1775                 }
1776
1777                 if (!(metadata_lte->flags & WIM_RESHDR_FLAG_METADATA)) {
1778                         ERROR("Expected metadata resource, but found "
1779                               "non-metadata stream.");
1780                         ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1781                         goto out_wimlib_free;
1782                 }
1783
1784                 if (i == image) {
1785                         /* Metadata resource is for the image being extracted.
1786                          * Parse it and save the metadata in memory.  */
1787                         ret = read_metadata_resource(pwm, imd);
1788                         if (ret)
1789                                 goto out_wimlib_free;
1790                         imd->modified = 1;
1791                 } else {
1792                         /* Metadata resource is not for the image being
1793                          * extracted.  Skip over it.  */
1794                         ret = skip_wim_stream(metadata_lte);
1795                         if (ret)
1796                                 goto out_wimlib_free;
1797                 }
1798         }
1799         /* Extract the image.  */
1800         extract_flags |= WIMLIB_EXTRACT_FLAG_FROM_PIPE;
1801         ret = do_wimlib_extract_image(pwm, image, target,
1802                                       extract_flags, progress_func);
1803         /* Clean up and return.  */
1804 out_wimlib_free:
1805         wimlib_free(pwm);
1806         return ret;
1807 }
1808
1809 WIMLIBAPI int
1810 wimlib_extract_image(WIMStruct *wim, int image, const tchar *target,
1811                      int extract_flags, wimlib_progress_func_t progress_func)
1812 {
1813         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
1814                 return WIMLIB_ERR_INVALID_PARAM;
1815         return do_wimlib_extract_image(wim, image, target, extract_flags,
1816                                        progress_func);
1817 }