]> wimlib.net Git - wimlib/blob - src/extract.c
05c3d16b2551ce13932ba190b36e2f481dbaf573
[wimlib] / src / extract.c
1 /*
2  * extract.c
3  *
4  * Support for extracting WIM images, or files or directories contained in a WIM
5  * image.
6  */
7
8 /*
9  * Copyright (C) 2012, 2013, 2014 Eric Biggers
10  *
11  * This file is part of wimlib, a library for working with WIM files.
12  *
13  * wimlib is free software; you can redistribute it and/or modify it under the
14  * terms of the GNU General Public License as published by the Free
15  * Software Foundation; either version 3 of the License, or (at your option)
16  * any later version.
17  *
18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
21  * details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
25  */
26
27 /*
28  * This file provides the API functions wimlib_extract_image(),
29  * wimlib_extract_image_from_pipe(), wimlib_extract_paths(), and
30  * wimlib_extract_pathlist().  Internally, all end up calling
31  * do_wimlib_extract_paths() and extract_trees().
32  *
33  * Although wimlib supports multiple extraction modes/backends (NTFS-3g, UNIX,
34  * Win32), this file does not itself have code to extract files or directories
35  * to any specific target; instead, it handles generic functionality and relies
36  * on lower-level callback functions declared in `struct apply_operations' to do
37  * the actual extraction.
38  */
39
40 #ifdef HAVE_CONFIG_H
41 #  include "config.h"
42 #endif
43
44 #include "wimlib/apply.h"
45 #include "wimlib/dentry.h"
46 #include "wimlib/encoding.h"
47 #include "wimlib/endianness.h"
48 #include "wimlib/error.h"
49 #include "wimlib/lookup_table.h"
50 #include "wimlib/metadata.h"
51 #include "wimlib/pathlist.h"
52 #include "wimlib/paths.h"
53 #include "wimlib/reparse.h"
54 #include "wimlib/resource.h"
55 #include "wimlib/security.h"
56 #ifdef __WIN32__
57 #  include "wimlib/win32.h" /* for realpath() equivalent */
58 #endif
59 #include "wimlib/xml.h"
60 #include "wimlib/wildcard.h"
61 #include "wimlib/wim.h"
62
63 #include <errno.h>
64 #include <fcntl.h>
65 #include <stdlib.h>
66 #include <sys/stat.h>
67 #include <unistd.h>
68
69 #define WIMLIB_EXTRACT_FLAG_MULTI_IMAGE 0x80000000
70 #define WIMLIB_EXTRACT_FLAG_FROM_PIPE   0x40000000
71 #define WIMLIB_EXTRACT_FLAG_IMAGEMODE   0x20000000
72
73 /* Keep in sync with wimlib.h  */
74 #define WIMLIB_EXTRACT_MASK_PUBLIC                              \
75         (WIMLIB_EXTRACT_FLAG_NTFS                       |       \
76          WIMLIB_EXTRACT_FLAG_UNIX_DATA                  |       \
77          WIMLIB_EXTRACT_FLAG_NO_ACLS                    |       \
78          WIMLIB_EXTRACT_FLAG_STRICT_ACLS                |       \
79          WIMLIB_EXTRACT_FLAG_RPFIX                      |       \
80          WIMLIB_EXTRACT_FLAG_NORPFIX                    |       \
81          WIMLIB_EXTRACT_FLAG_TO_STDOUT                  |       \
82          WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES  |       \
83          WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS         |       \
84          WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS          |       \
85          WIMLIB_EXTRACT_FLAG_STRICT_SHORT_NAMES         |       \
86          WIMLIB_EXTRACT_FLAG_STRICT_SYMLINKS            |       \
87          WIMLIB_EXTRACT_FLAG_GLOB_PATHS                 |       \
88          WIMLIB_EXTRACT_FLAG_STRICT_GLOB                |       \
89          WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES              |       \
90          WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE  |       \
91          WIMLIB_EXTRACT_FLAG_WIMBOOT)
92
93 /* Check whether the extraction of a dentry should be skipped completely.  */
94 static bool
95 dentry_is_supported(struct wim_dentry *dentry,
96                     const struct wim_features *supported_features)
97 {
98         struct wim_inode *inode = dentry->d_inode;
99
100         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
101                 return supported_features->reparse_points ||
102                         (inode_is_symlink(inode) &&
103                          supported_features->symlink_reparse_points);
104         }
105         if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
106                 if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)
107                         return supported_features->encrypted_directories != 0;
108                 else
109                         return supported_features->encrypted_files != 0;
110         }
111         return true;
112 }
113
114
115 #define PWM_ALLOW_WIM_HDR 0x00001
116 #define PWM_SILENT_EOF    0x00002
117
118 /* Read the header from a stream in a pipable WIM.  */
119 static int
120 read_pwm_stream_header(WIMStruct *pwm, struct wim_lookup_table_entry *lte,
121                        struct wim_resource_spec *rspec,
122                        int flags, struct wim_header_disk *hdr_ret)
123 {
124         union {
125                 struct pwm_stream_hdr stream_hdr;
126                 struct wim_header_disk pwm_hdr;
127         } buf;
128         struct wim_reshdr reshdr;
129         int ret;
130
131         ret = full_read(&pwm->in_fd, &buf.stream_hdr, sizeof(buf.stream_hdr));
132         if (ret)
133                 goto read_error;
134
135         if ((flags & PWM_ALLOW_WIM_HDR) && buf.stream_hdr.magic == PWM_MAGIC) {
136                 BUILD_BUG_ON(sizeof(buf.pwm_hdr) < sizeof(buf.stream_hdr));
137                 ret = full_read(&pwm->in_fd, &buf.stream_hdr + 1,
138                                 sizeof(buf.pwm_hdr) - sizeof(buf.stream_hdr));
139
140                 if (ret)
141                         goto read_error;
142                 lte->resource_location = RESOURCE_NONEXISTENT;
143                 memcpy(hdr_ret, &buf.pwm_hdr, sizeof(buf.pwm_hdr));
144                 return 0;
145         }
146
147         if (le64_to_cpu(buf.stream_hdr.magic) != PWM_STREAM_MAGIC) {
148                 ERROR("Data read on pipe is invalid (expected stream header).");
149                 return WIMLIB_ERR_INVALID_PIPABLE_WIM;
150         }
151
152         copy_hash(lte->hash, buf.stream_hdr.hash);
153
154         reshdr.size_in_wim = 0;
155         reshdr.flags = le32_to_cpu(buf.stream_hdr.flags);
156         reshdr.offset_in_wim = pwm->in_fd.offset;
157         reshdr.uncompressed_size = le64_to_cpu(buf.stream_hdr.uncompressed_size);
158         wim_res_hdr_to_spec(&reshdr, pwm, rspec);
159         lte_bind_wim_resource_spec(lte, rspec);
160         lte->flags = rspec->flags;
161         lte->size = rspec->uncompressed_size;
162         lte->offset_in_res = 0;
163         return 0;
164
165 read_error:
166         if (ret != WIMLIB_ERR_UNEXPECTED_END_OF_FILE || !(flags & PWM_SILENT_EOF))
167                 ERROR_WITH_ERRNO("Error reading pipable WIM from pipe");
168         return ret;
169 }
170
171 static int
172 load_streams_from_pipe(struct apply_ctx *ctx,
173                        const struct read_stream_list_callbacks *cbs)
174 {
175         struct wim_lookup_table_entry *found_lte = NULL;
176         struct wim_resource_spec *rspec = NULL;
177         struct wim_lookup_table *lookup_table;
178         int ret;
179
180         ret = WIMLIB_ERR_NOMEM;
181         found_lte = new_lookup_table_entry();
182         if (!found_lte)
183                 goto out;
184
185         rspec = MALLOC(sizeof(struct wim_resource_spec));
186         if (!rspec)
187                 goto out;
188
189         lookup_table = ctx->wim->lookup_table;
190         memcpy(ctx->progress.extract.guid, ctx->wim->hdr.guid, WIM_GUID_LEN);
191         ctx->progress.extract.part_number = ctx->wim->hdr.part_number;
192         ctx->progress.extract.total_parts = ctx->wim->hdr.total_parts;
193         if (ctx->progress_func) {
194                 ctx->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_SPWM_PART_BEGIN,
195                                    &ctx->progress);
196         }
197         while (ctx->num_streams_remaining) {
198                 struct wim_header_disk pwm_hdr;
199                 struct wim_lookup_table_entry *needed_lte;
200
201                 if (found_lte->resource_location != RESOURCE_NONEXISTENT)
202                         lte_unbind_wim_resource_spec(found_lte);
203                 ret = read_pwm_stream_header(ctx->wim, found_lte, rspec,
204                                              PWM_ALLOW_WIM_HDR, &pwm_hdr);
205                 if (ret)
206                         goto out;
207
208                 if ((found_lte->resource_location != RESOURCE_NONEXISTENT)
209                     && !(found_lte->flags & WIM_RESHDR_FLAG_METADATA)
210                     && (needed_lte = lookup_stream(lookup_table, found_lte->hash))
211                     && (needed_lte->out_refcnt))
212                 {
213                         needed_lte->offset_in_res = found_lte->offset_in_res;
214                         needed_lte->flags = found_lte->flags;
215                         needed_lte->size = found_lte->size;
216
217                         lte_unbind_wim_resource_spec(found_lte);
218                         lte_bind_wim_resource_spec(needed_lte, rspec);
219
220                         ret = (*cbs->begin_stream)(needed_lte, 0,
221                                                    cbs->begin_stream_ctx);
222                         if (ret) {
223                                 lte_unbind_wim_resource_spec(needed_lte);
224                                 goto out;
225                         }
226
227                         ret = extract_stream(needed_lte, needed_lte->size,
228                                              cbs->consume_chunk,
229                                              cbs->consume_chunk_ctx);
230
231                         ret = (*cbs->end_stream)(needed_lte, ret,
232                                                  cbs->end_stream_ctx);
233                         lte_unbind_wim_resource_spec(needed_lte);
234                         if (ret)
235                                 goto out;
236                         ctx->num_streams_remaining--;
237                 } else if (found_lte->resource_location != RESOURCE_NONEXISTENT) {
238                         ret = skip_wim_stream(found_lte);
239                         if (ret)
240                                 goto out;
241                 } else {
242                         u16 part_number = le16_to_cpu(pwm_hdr.part_number);
243                         u16 total_parts = le16_to_cpu(pwm_hdr.total_parts);
244
245                         if (part_number != ctx->progress.extract.part_number ||
246                             total_parts != ctx->progress.extract.total_parts ||
247                             memcmp(pwm_hdr.guid, ctx->progress.extract.guid,
248                                    WIM_GUID_LEN))
249                         {
250                                 ctx->progress.extract.part_number = part_number;
251                                 ctx->progress.extract.total_parts = total_parts;
252                                 memcpy(ctx->progress.extract.guid,
253                                        pwm_hdr.guid, WIM_GUID_LEN);
254                                 if (ctx->progress_func) {
255                                         ctx->progress_func(
256                                                 WIMLIB_PROGRESS_MSG_EXTRACT_SPWM_PART_BEGIN,
257                                                            &ctx->progress);
258                                 }
259                         }
260                 }
261         }
262         ret = 0;
263 out:
264         if (found_lte->resource_location != RESOURCE_IN_WIM)
265                 FREE(rspec);
266         free_lookup_table_entry(found_lte);
267         return ret;
268 }
269
270 static int
271 begin_extract_stream_with_progress(struct wim_lookup_table_entry *lte,
272                                    u32 flags, void *_ctx)
273 {
274         struct apply_ctx *ctx = _ctx;
275
276         ctx->cur_stream = lte;
277
278         return (*ctx->saved_cbs->begin_stream)(lte, flags,
279                                                ctx->saved_cbs->begin_stream_ctx);
280 }
281
282 static int
283 consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx)
284 {
285         struct apply_ctx *ctx = _ctx;
286         wimlib_progress_func_t progress_func = ctx->progress_func;
287         union wimlib_progress_info *progress = &ctx->progress;
288
289         if (likely(ctx->supported_features.hard_links)) {
290                 progress->extract.completed_bytes +=
291                         (u64)size * ctx->cur_stream->out_refcnt;
292         } else {
293                 const struct stream_owner *owners = stream_owners(ctx->cur_stream);
294                 for (u32 i = 0; i < ctx->cur_stream->out_refcnt; i++) {
295                         const struct wim_inode *inode = owners[i].inode;
296                         const struct wim_dentry *dentry;
297
298                         list_for_each_entry(dentry,
299                                             &inode->i_extraction_aliases,
300                                             d_extraction_alias_node)
301                         {
302                                 progress->extract.completed_bytes += size;
303                         }
304                 }
305         }
306         if (progress->extract.completed_bytes >= ctx->next_progress) {
307                 progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS, progress);
308                 if (progress->extract.completed_bytes >=
309                     progress->extract.total_bytes)
310                 {
311                         ctx->next_progress = UINT64_MAX;
312                 } else {
313                         ctx->next_progress += progress->extract.total_bytes / 128;
314                         if (ctx->next_progress > progress->extract.total_bytes)
315                                 ctx->next_progress = progress->extract.total_bytes;
316                 }
317         }
318         return (*ctx->saved_cbs->consume_chunk)(chunk, size,
319                                                 ctx->saved_cbs->consume_chunk_ctx);
320 }
321
322 /*
323  * Read the list of single-instance streams to extract and feed their data into
324  * the specified callback functions.
325  *
326  * This handles checksumming each stream.
327  *
328  * This also handles sending WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS.
329  *
330  * This also works if the WIM is being read from a pipe, whereas attempting to
331  * read streams directly (e.g. with read_full_stream_into_buf()) will not.
332  */
333 int
334 extract_stream_list(struct apply_ctx *ctx,
335                     const struct read_stream_list_callbacks *cbs)
336 {
337         struct read_stream_list_callbacks wrapper_cbs = {
338                 .begin_stream      = begin_extract_stream_with_progress,
339                 .begin_stream_ctx  = ctx,
340                 .consume_chunk     = consume_chunk_with_progress,
341                 .consume_chunk_ctx = ctx,
342                 .end_stream        = cbs->end_stream,
343                 .end_stream_ctx    = cbs->end_stream_ctx,
344         };
345         if (ctx->progress_func) {
346                 ctx->saved_cbs = cbs;
347                 cbs = &wrapper_cbs;
348         }
349         if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
350                 return load_streams_from_pipe(ctx, cbs);
351         } else {
352                 return read_stream_list(&ctx->stream_list,
353                                         offsetof(struct wim_lookup_table_entry,
354                                                  extraction_list),
355                                         cbs, VERIFY_STREAM_HASHES);
356         }
357 }
358
359 /* Extract a WIM dentry to standard output.
360  *
361  * This obviously doesn't make sense in all cases.  We return an error if the
362  * dentry does not correspond to a regular file.  Otherwise we extract the
363  * unnamed data stream only.  */
364 static int
365 extract_dentry_to_stdout(struct wim_dentry *dentry,
366                          const struct wim_lookup_table *lookup_table)
367 {
368         struct wim_inode *inode = dentry->d_inode;
369         struct wim_lookup_table_entry *lte;
370         struct filedes _stdout;
371
372         if (inode->i_attributes & (FILE_ATTRIBUTE_REPARSE_POINT |
373                                    FILE_ATTRIBUTE_DIRECTORY))
374         {
375                 ERROR("\"%"TS"\" is not a regular file and therefore cannot be "
376                       "extracted to standard output", dentry_full_path(dentry));
377                 return WIMLIB_ERR_NOT_A_REGULAR_FILE;
378         }
379
380         lte = inode_unnamed_lte(inode, lookup_table);
381         if (!lte) {
382                 const u8 *hash = inode_unnamed_stream_hash(inode);
383                 if (!is_zero_hash(hash))
384                         return stream_not_found_error(inode, hash);
385                 return 0;
386         }
387
388         filedes_init(&_stdout, STDOUT_FILENO);
389         return extract_full_stream_to_fd(lte, &_stdout);
390 }
391
392 static int
393 extract_dentries_to_stdout(struct wim_dentry **dentries, size_t num_dentries,
394                            const struct wim_lookup_table *lookup_table)
395 {
396         for (size_t i = 0; i < num_dentries; i++) {
397                 int ret = extract_dentry_to_stdout(dentries[i], lookup_table);
398                 if (ret)
399                         return ret;
400         }
401         return 0;
402 }
403
404 /**********************************************************************/
405
406 /*
407  * Removes duplicate dentries from the array.
408  *
409  * Returns the new number of dentries, packed at the front of the array.
410  */
411 static size_t
412 remove_duplicate_trees(struct wim_dentry **trees, size_t num_trees)
413 {
414         size_t i, j = 0;
415         for (i = 0; i < num_trees; i++) {
416                 if (!trees[i]->tmp_flag) {
417                         /* Found distinct dentry.  */
418                         trees[i]->tmp_flag = 1;
419                         trees[j++] = trees[i];
420                 }
421         }
422         for (i = 0; i < j; i++)
423                 trees[i]->tmp_flag = 0;
424         return j;
425 }
426
427 /*
428  * Remove dentries that are descendants of other dentries in the array.
429  *
430  * Returns the new number of dentries, packed at the front of the array.
431  */
432 static size_t
433 remove_contained_trees(struct wim_dentry **trees, size_t num_trees)
434 {
435         size_t i, j = 0;
436         for (i = 0; i < num_trees; i++)
437                 trees[i]->tmp_flag = 1;
438         for (i = 0; i < num_trees; i++) {
439                 struct wim_dentry *d = trees[i];
440                 while (!dentry_is_root(d)) {
441                         d = d->parent;
442                         if (d->tmp_flag)
443                                 goto tree_contained;
444                 }
445                 trees[j++] = trees[i];
446                 continue;
447
448         tree_contained:
449                 trees[i]->tmp_flag = 0;
450         }
451
452         for (i = 0; i < j; i++)
453                 trees[i]->tmp_flag = 0;
454         return j;
455 }
456
457 static int
458 dentry_append_to_list(struct wim_dentry *dentry, void *_dentry_list)
459 {
460         struct list_head *dentry_list = _dentry_list;
461         list_add_tail(&dentry->d_extraction_list_node, dentry_list);
462         return 0;
463 }
464
465 static void
466 dentry_reset_extraction_list_node(struct wim_dentry *dentry)
467 {
468         dentry->d_extraction_list_node = (struct list_head){NULL, NULL};
469 }
470
471 static int
472 dentry_delete_from_list(struct wim_dentry *dentry, void *_ignore)
473 {
474         list_del(&dentry->d_extraction_list_node);
475         dentry_reset_extraction_list_node(dentry);
476         return 0;
477 }
478
479 /*
480  * Build the preliminary list of dentries to be extracted.
481  *
482  * The list maintains the invariant that if d1 and d2 are in the list and d1 is
483  * an ancestor of d2, then d1 appears before d2 in the list.
484  */
485 static void
486 build_dentry_list(struct list_head *dentry_list, struct wim_dentry **trees,
487                   size_t num_trees, bool add_ancestors)
488 {
489         INIT_LIST_HEAD(dentry_list);
490
491         /* Add the trees recursively.  */
492         for (size_t i = 0; i < num_trees; i++)
493                 for_dentry_in_tree(trees[i], dentry_append_to_list, dentry_list);
494
495         /* If requested, add ancestors of the trees.  */
496         if (add_ancestors) {
497                 for (size_t i = 0; i < num_trees; i++) {
498                         struct wim_dentry *dentry = trees[i];
499                         struct wim_dentry *ancestor;
500                         struct list_head *place_after;
501
502                         if (dentry_is_root(dentry))
503                                 continue;
504
505                         place_after = dentry_list;
506                         ancestor = dentry;
507                         do {
508                                 ancestor = ancestor->parent;
509                                 if (will_extract_dentry(ancestor)) {
510                                         place_after = &ancestor->d_extraction_list_node;
511                                         break;
512                                 }
513                         } while (!dentry_is_root(ancestor));
514
515                         ancestor = dentry;
516                         do {
517                                 ancestor = ancestor->parent;
518                                 if (will_extract_dentry(ancestor))
519                                         break;
520                                 list_add(&ancestor->d_extraction_list_node, place_after);
521                         } while (!dentry_is_root(ancestor));
522                 }
523         }
524 }
525
526 static void
527 destroy_dentry_list(struct list_head *dentry_list)
528 {
529         struct wim_dentry *dentry, *tmp;
530         struct wim_inode *inode;
531
532         list_for_each_entry_safe(dentry, tmp, dentry_list, d_extraction_list_node) {
533                 inode = dentry->d_inode;
534                 dentry_reset_extraction_list_node(dentry);
535                 inode->i_visited = 0;
536                 if ((void *)dentry->d_extraction_name != (void *)dentry->file_name)
537                         FREE(dentry->d_extraction_name);
538                 dentry->d_extraction_name = NULL;
539                 dentry->d_extraction_name_nchars = 0;
540         }
541 }
542
543 static void
544 destroy_stream_list(struct list_head *stream_list)
545 {
546         struct wim_lookup_table_entry *lte;
547
548         list_for_each_entry(lte, stream_list, extraction_list)
549                 if (lte->out_refcnt > ARRAY_LEN(lte->inline_stream_owners))
550                         FREE(lte->stream_owners);
551 }
552
553 #ifdef __WIN32__
554 static const utf16lechar replacement_char = cpu_to_le16(0xfffd);
555 #else
556 static const utf16lechar replacement_char = cpu_to_le16('?');
557 #endif
558
559 static bool
560 file_name_valid(utf16lechar *name, size_t num_chars, bool fix)
561 {
562         size_t i;
563
564         if (num_chars == 0)
565                 return true;
566         for (i = 0; i < num_chars; i++) {
567                 switch (name[i]) {
568         #ifdef __WIN32__
569                 case cpu_to_le16('\\'):
570                 case cpu_to_le16(':'):
571                 case cpu_to_le16('*'):
572                 case cpu_to_le16('?'):
573                 case cpu_to_le16('"'):
574                 case cpu_to_le16('<'):
575                 case cpu_to_le16('>'):
576                 case cpu_to_le16('|'):
577         #endif
578                 case cpu_to_le16('/'):
579                 case cpu_to_le16('\0'):
580                         if (fix)
581                                 name[i] = replacement_char;
582                         else
583                                 return false;
584                 }
585         }
586
587 #ifdef __WIN32__
588         if (name[num_chars - 1] == cpu_to_le16(' ') ||
589             name[num_chars - 1] == cpu_to_le16('.'))
590         {
591                 if (fix)
592                         name[num_chars - 1] = replacement_char;
593                 else
594                         return false;
595         }
596 #endif
597         return true;
598 }
599
600 static int
601 dentry_calculate_extraction_name(struct wim_dentry *dentry,
602                                  struct apply_ctx *ctx)
603 {
604         int ret;
605
606         if (!dentry_is_supported(dentry, &ctx->supported_features))
607                 goto skip_dentry;
608
609         if (dentry_is_root(dentry))
610                 return 0;
611
612         if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
613                 dentry->d_extraction_name = dentry->file_name;
614                 dentry->d_extraction_name_nchars = dentry->file_name_nbytes /
615                                                    sizeof(utf16lechar);
616                 return 0;
617         }
618
619         if (!ctx->supported_features.case_sensitive_filenames) {
620                 struct wim_dentry *other;
621                 list_for_each_entry(other, &dentry->d_ci_conflict_list,
622                                     d_ci_conflict_list)
623                 {
624                         if (will_extract_dentry(other)) {
625                                 if (ctx->extract_flags &
626                                     WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS) {
627                                         WARNING("\"%"TS"\" has the same "
628                                                 "case-insensitive name as "
629                                                 "\"%"TS"\"; extracting "
630                                                 "dummy name instead",
631                                                 dentry_full_path(dentry),
632                                                 dentry_full_path(other));
633                                         goto out_replace;
634                                 } else {
635                                         WARNING("Not extracting \"%"TS"\": "
636                                                 "has same case-insensitive "
637                                                 "name as \"%"TS"\"",
638                                                 dentry_full_path(dentry),
639                                                 dentry_full_path(other));
640                                         goto skip_dentry;
641                                 }
642                         }
643                 }
644         }
645
646         if (file_name_valid(dentry->file_name, dentry->file_name_nbytes / 2, false)) {
647                 ret = utf16le_get_tstr(dentry->file_name,
648                                        dentry->file_name_nbytes,
649                                        (const tchar **)&dentry->d_extraction_name,
650                                        &dentry->d_extraction_name_nchars);
651                 dentry->d_extraction_name_nchars /= sizeof(tchar);
652                 return ret;
653         } else {
654                 if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES)
655                 {
656                         WARNING("\"%"TS"\" has an invalid filename "
657                                 "that is not supported on this platform; "
658                                 "extracting dummy name instead",
659                                 dentry_full_path(dentry));
660                         goto out_replace;
661                 } else {
662                         WARNING("Not extracting \"%"TS"\": has an invalid filename "
663                                 "that is not supported on this platform",
664                                 dentry_full_path(dentry));
665                         goto skip_dentry;
666                 }
667         }
668
669 out_replace:
670         {
671                 utf16lechar utf16_name_copy[dentry->file_name_nbytes / 2];
672
673                 memcpy(utf16_name_copy, dentry->file_name, dentry->file_name_nbytes);
674                 file_name_valid(utf16_name_copy, dentry->file_name_nbytes / 2, true);
675
676                 const tchar *tchar_name;
677                 size_t tchar_nchars;
678
679                 ret = utf16le_get_tstr(utf16_name_copy,
680                                        dentry->file_name_nbytes,
681                                        &tchar_name, &tchar_nchars);
682                 if (ret)
683                         return ret;
684
685                 tchar_nchars /= sizeof(tchar);
686
687                 size_t fixed_name_num_chars = tchar_nchars;
688                 tchar fixed_name[tchar_nchars + 50];
689
690                 tmemcpy(fixed_name, tchar_name, tchar_nchars);
691                 fixed_name_num_chars += tsprintf(fixed_name + tchar_nchars,
692                                                  T(" (invalid filename #%lu)"),
693                                                  ++ctx->invalid_sequence);
694
695                 utf16le_put_tstr(tchar_name);
696
697                 dentry->d_extraction_name = memdup(fixed_name,
698                                                    2 * fixed_name_num_chars + 2);
699                 if (!dentry->d_extraction_name)
700                         return WIMLIB_ERR_NOMEM;
701                 dentry->d_extraction_name_nchars = fixed_name_num_chars;
702         }
703         return 0;
704
705 skip_dentry:
706         for_dentry_in_tree(dentry, dentry_delete_from_list, NULL);
707         return 0;
708 }
709
710 /*
711  * Calculate the actual filename component at which each WIM dentry will be
712  * extracted, with special handling for dentries that are unsupported by the
713  * extraction backend or have invalid names.
714  *
715  * ctx->supported_features must be filled in.
716  *
717  * Possible error codes: WIMLIB_ERR_NOMEM, WIMLIB_ERR_INVALID_UTF16_STRING
718  */
719 static int
720 dentry_list_calculate_extraction_names(struct list_head *dentry_list,
721                                        struct apply_ctx *ctx)
722 {
723         struct list_head *prev, *cur;
724
725         /* Can't use list_for_each_entry() because a call to
726          * dentry_calculate_extraction_name() may delete the current dentry and
727          * its children from the list.  */
728
729         prev = dentry_list;
730         for (;;) {
731                 struct wim_dentry *dentry;
732                 int ret;
733
734                 cur = prev->next;
735                 if (cur == dentry_list)
736                         break;
737
738                 dentry = list_entry(cur, struct wim_dentry, d_extraction_list_node);
739
740                 ret = dentry_calculate_extraction_name(dentry, ctx);
741                 if (ret)
742                         return ret;
743
744                 if (prev->next == cur)
745                         prev = cur;
746                 else
747                         ; /* Current dentry and its children (which follow in
748                              the list) were deleted.  prev stays the same.  */
749         }
750         return 0;
751 }
752
753 static int
754 dentry_resolve_streams(struct wim_dentry *dentry, int extract_flags,
755                        struct wim_lookup_table *lookup_table)
756 {
757         struct wim_inode *inode = dentry->d_inode;
758         struct wim_lookup_table_entry *lte;
759         int ret;
760         bool force = false;
761
762         /* Special case:  when extracting from a pipe, the WIM lookup table is
763          * initially empty, so "resolving" an inode's streams is initially not
764          * possible.  However, we still need to keep track of which streams,
765          * identified by SHA1 message digests, need to be extracted, so we
766          * "resolve" the inode's streams anyway by allocating new entries.  */
767         if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE)
768                 force = true;
769         ret = inode_resolve_streams(inode, lookup_table, force);
770         if (ret)
771                 return ret;
772         for (u32 i = 0; i <= inode->i_num_ads; i++) {
773                 lte = inode_stream_lte_resolved(inode, i);
774                 if (lte)
775                         lte->out_refcnt = 0;
776         }
777         return 0;
778 }
779
780 /*
781  * For each dentry to be extracted, resolve all streams in the corresponding
782  * inode and set 'out_refcnt' in each to 0.
783  *
784  * Possible error codes: WIMLIB_ERR_RESOURCE_NOT_FOUND, WIMLIB_ERR_NOMEM.
785  */
786 static int
787 dentry_list_resolve_streams(struct list_head *dentry_list,
788                             struct apply_ctx *ctx)
789 {
790         struct wim_dentry *dentry;
791         int ret;
792
793         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
794                 ret = dentry_resolve_streams(dentry,
795                                              ctx->extract_flags,
796                                              ctx->wim->lookup_table);
797                 if (ret)
798                         return ret;
799         }
800         return 0;
801 }
802
803 static int
804 ref_stream(struct wim_lookup_table_entry *lte, u32 stream_idx,
805            struct wim_dentry *dentry, struct apply_ctx *ctx)
806 {
807         struct wim_inode *inode = dentry->d_inode;
808         struct stream_owner *stream_owners;
809
810         if (!lte)
811                 return 0;
812
813         /* Tally the size only for each extraction of the stream (not hard
814          * links).  */
815         if (inode->i_visited && ctx->supported_features.hard_links)
816                 return 0;
817
818         ctx->progress.extract.total_bytes += lte->size;
819         ctx->progress.extract.num_streams++;
820
821         if (inode->i_visited)
822                 return 0;
823
824         /* Add stream to the dentry_list only one time, even if it's going
825          * to be extracted to multiple inodes.  */
826         if (lte->out_refcnt == 0) {
827                 list_add_tail(&lte->extraction_list, &ctx->stream_list);
828                 ctx->num_streams_remaining++;
829         }
830
831         /* If inode not yet been visited, append it to the stream_owners array.  */
832         if (lte->out_refcnt < ARRAY_LEN(lte->inline_stream_owners)) {
833                 stream_owners = lte->inline_stream_owners;
834         } else {
835                 struct stream_owner *prev_stream_owners;
836                 size_t alloc_stream_owners;
837
838                 if (lte->out_refcnt == ARRAY_LEN(lte->inline_stream_owners)) {
839                         prev_stream_owners = NULL;
840                         alloc_stream_owners = ARRAY_LEN(lte->inline_stream_owners);
841                 } else {
842                         prev_stream_owners = lte->stream_owners;
843                         alloc_stream_owners = lte->alloc_stream_owners;
844                 }
845
846                 if (lte->out_refcnt == alloc_stream_owners) {
847                         alloc_stream_owners *= 2;
848                         stream_owners = REALLOC(prev_stream_owners,
849                                                alloc_stream_owners *
850                                                 sizeof(stream_owners[0]));
851                         if (!stream_owners)
852                                 return WIMLIB_ERR_NOMEM;
853                         if (!prev_stream_owners) {
854                                 memcpy(stream_owners,
855                                        lte->inline_stream_owners,
856                                        sizeof(lte->inline_stream_owners));
857                         }
858                         lte->stream_owners = stream_owners;
859                         lte->alloc_stream_owners = alloc_stream_owners;
860                 }
861                 stream_owners = lte->stream_owners;
862         }
863         stream_owners[lte->out_refcnt].inode = inode;
864         if (stream_idx == 0) {
865                 stream_owners[lte->out_refcnt].stream_name = NULL;
866         } else {
867                 stream_owners[lte->out_refcnt].stream_name =
868                         inode->i_ads_entries[stream_idx - 1].stream_name;
869         }
870         lte->out_refcnt++;
871         return 0;
872 }
873
874 static int
875 dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx)
876 {
877         struct wim_inode *inode = dentry->d_inode;
878         int ret;
879
880         /* The unnamed data stream will always be extracted, except in an
881          * unlikely case.  */
882         if (!inode_is_encrypted_directory(inode)) {
883                 u16 stream_idx;
884                 struct wim_lookup_table_entry *stream;
885
886                 stream = inode_unnamed_stream_resolved(inode, &stream_idx);
887                 ret = ref_stream(stream, stream_idx, dentry, ctx);
888                 if (ret)
889                         return ret;
890         }
891
892         /* Named data streams will be extracted only if supported in the current
893          * extraction mode and volume, and to avoid complications, if not doing
894          * a linked extraction.  */
895         if (ctx->supported_features.named_data_streams) {
896                 for (u16 i = 0; i < inode->i_num_ads; i++) {
897                         if (!ads_entry_is_named_stream(&inode->i_ads_entries[i]))
898                                 continue;
899                         ret = ref_stream(inode->i_ads_entries[i].lte, i + 1,
900                                          dentry, ctx);
901                         if (ret)
902                                 return ret;
903                 }
904         }
905         inode->i_visited = 1;
906         return 0;
907 }
908
909 /*
910  * For each dentry to be extracted, iterate through the data streams of the
911  * corresponding inode.  For each such stream that is not to be ignored due to
912  * the supported features or extraction flags, add it to the list of streams to
913  * be extracted (ctx->stream_list) if not already done so.
914  *
915  * Also builds a mapping from each stream to the inodes referencing it.
916  *
917  * This also initializes the extract progress info with byte and stream
918  * information.
919  *
920  * ctx->supported_features must be filled in.
921  *
922  * Possible error codes: WIMLIB_ERR_NOMEM.
923  */
924 static int
925 dentry_list_ref_streams(struct list_head *dentry_list, struct apply_ctx *ctx)
926 {
927         struct wim_dentry *dentry;
928         int ret;
929
930         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
931                 ret = dentry_ref_streams(dentry, ctx);
932                 if (ret)
933                         return ret;
934         }
935         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
936                 dentry->d_inode->i_visited = 0;
937         return 0;
938 }
939
940 static void
941 dentry_list_build_inode_alias_lists(struct list_head *dentry_list)
942 {
943         struct wim_dentry *dentry;
944         struct wim_inode *inode;
945
946         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
947                 inode = dentry->d_inode;
948                 if (!inode->i_visited)
949                         INIT_LIST_HEAD(&inode->i_extraction_aliases);
950                 list_add_tail(&dentry->d_extraction_alias_node,
951                               &inode->i_extraction_aliases);
952                 inode->i_visited = 1;
953         }
954         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
955                 dentry->d_inode->i_visited = 0;
956 }
957
958 static void
959 inode_tally_features(const struct wim_inode *inode,
960                      struct wim_features *features)
961 {
962         if (inode->i_attributes & FILE_ATTRIBUTE_ARCHIVE)
963                 features->archive_files++;
964         if (inode->i_attributes & FILE_ATTRIBUTE_HIDDEN)
965                 features->hidden_files++;
966         if (inode->i_attributes & FILE_ATTRIBUTE_SYSTEM)
967                 features->system_files++;
968         if (inode->i_attributes & FILE_ATTRIBUTE_COMPRESSED)
969                 features->compressed_files++;
970         if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
971                 if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)
972                         features->encrypted_directories++;
973                 else
974                         features->encrypted_files++;
975         }
976         if (inode->i_attributes & FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)
977                 features->not_context_indexed_files++;
978         if (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE)
979                 features->sparse_files++;
980         if (inode_has_named_stream(inode))
981                 features->named_data_streams++;
982         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
983                 features->reparse_points++;
984                 if (inode_is_symlink(inode))
985                         features->symlink_reparse_points++;
986                 else
987                         features->other_reparse_points++;
988         }
989         if (inode->i_security_id != -1)
990                 features->security_descriptors++;
991         if (inode_has_unix_data(inode))
992                 features->unix_data++;
993 }
994
995 /* Tally features necessary to extract a dentry and the corresponding inode.  */
996 static void
997 dentry_tally_features(struct wim_dentry *dentry, struct wim_features *features)
998 {
999         struct wim_inode *inode = dentry->d_inode;
1000
1001         if (dentry_has_short_name(dentry))
1002                 features->short_names++;
1003
1004         if (inode->i_visited) {
1005                 features->hard_links++;
1006         } else {
1007                 inode_tally_features(inode, features);
1008                 inode->i_visited = 1;
1009         }
1010 }
1011
1012 /* Tally the features necessary to extract the specified dentries.  */
1013 static void
1014 dentry_list_get_features(struct list_head *dentry_list,
1015                          struct wim_features *features)
1016 {
1017         struct wim_dentry *dentry;
1018
1019         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
1020                 dentry_tally_features(dentry, features);
1021
1022         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
1023                 dentry->d_inode->i_visited = 0;
1024 }
1025
1026 static int
1027 do_feature_check(const struct wim_features *required_features,
1028                  const struct wim_features *supported_features,
1029                  int extract_flags)
1030 {
1031         /* File attributes.  */
1032         if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES)) {
1033                 /* Note: Don't bother the user about FILE_ATTRIBUTE_ARCHIVE.
1034                  * We're an archive program, so theoretically we can do what we
1035                  * want with it.  */
1036
1037                 if (required_features->hidden_files &&
1038                     !supported_features->hidden_files)
1039                         WARNING("Ignoring FILE_ATTRIBUTE_HIDDEN of %lu files",
1040                                 required_features->hidden_files);
1041
1042                 if (required_features->system_files &&
1043                     !supported_features->system_files)
1044                         WARNING("Ignoring FILE_ATTRIBUTE_SYSTEM of %lu files",
1045                                 required_features->system_files);
1046
1047                 if (required_features->compressed_files &&
1048                     !supported_features->compressed_files)
1049                         WARNING("Ignoring FILE_ATTRIBUTE_COMPRESSED of %lu files",
1050                                 required_features->compressed_files);
1051
1052                 if (required_features->not_context_indexed_files &&
1053                     !supported_features->not_context_indexed_files)
1054                         WARNING("Ignoring FILE_ATTRIBUTE_NOT_CONTENT_INDEXED of %lu files",
1055                                 required_features->not_context_indexed_files);
1056
1057                 if (required_features->sparse_files &&
1058                     !supported_features->sparse_files)
1059                         WARNING("Ignoring FILE_ATTRIBUTE_SPARSE_FILE of %lu files",
1060                                 required_features->sparse_files);
1061
1062                 if (required_features->encrypted_directories &&
1063                     !supported_features->encrypted_directories)
1064                         WARNING("Ignoring FILE_ATTRIBUTE_ENCRYPTED of %lu directories",
1065                                 required_features->encrypted_directories);
1066         }
1067
1068         /* Encrypted files.  */
1069         if (required_features->encrypted_files &&
1070             !supported_features->encrypted_files)
1071                 WARNING("Ignoring %lu encrypted files",
1072                         required_features->encrypted_files);
1073
1074         /* Named data streams.  */
1075         if (required_features->named_data_streams &&
1076             (!supported_features->named_data_streams))
1077                 WARNING("Ignoring named data streams of %lu files",
1078                         required_features->named_data_streams);
1079
1080         /* Hard links.  */
1081         if (required_features->hard_links && !supported_features->hard_links)
1082                 WARNING("Extracting %lu hard links as independent files",
1083                         required_features->hard_links);
1084
1085         /* Symbolic links and reparse points.  */
1086         if ((extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_SYMLINKS) &&
1087             required_features->symlink_reparse_points &&
1088             !supported_features->symlink_reparse_points &&
1089             !supported_features->reparse_points)
1090         {
1091                 ERROR("Extraction backend does not support symbolic links!");
1092                 return WIMLIB_ERR_UNSUPPORTED;
1093         }
1094         if (required_features->reparse_points &&
1095             !supported_features->reparse_points)
1096         {
1097                 if (supported_features->symlink_reparse_points) {
1098                         if (required_features->other_reparse_points) {
1099                                 WARNING("Ignoring %lu non-symlink/junction "
1100                                         "reparse point files",
1101                                         required_features->other_reparse_points);
1102                         }
1103                 } else {
1104                         WARNING("Ignoring %lu reparse point files",
1105                                 required_features->reparse_points);
1106                 }
1107         }
1108
1109         /* Security descriptors.  */
1110         if (((extract_flags & (WIMLIB_EXTRACT_FLAG_STRICT_ACLS |
1111                                WIMLIB_EXTRACT_FLAG_UNIX_DATA))
1112              == WIMLIB_EXTRACT_FLAG_STRICT_ACLS) &&
1113             required_features->security_descriptors &&
1114             !supported_features->security_descriptors)
1115         {
1116                 ERROR("Extraction backend does not support security descriptors!");
1117                 return WIMLIB_ERR_UNSUPPORTED;
1118         }
1119         if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ACLS) &&
1120             required_features->security_descriptors &&
1121             !supported_features->security_descriptors)
1122                 WARNING("Ignoring Windows NT security descriptors of %lu files",
1123                         required_features->security_descriptors);
1124
1125         /* UNIX data.  */
1126         if ((extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) &&
1127             required_features->unix_data && !supported_features->unix_data)
1128         {
1129                 ERROR("Extraction backend does not support UNIX data!");
1130                 return WIMLIB_ERR_UNSUPPORTED;
1131         }
1132
1133         /* DOS Names.  */
1134         if (required_features->short_names &&
1135             !supported_features->short_names)
1136         {
1137                 if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_SHORT_NAMES) {
1138                         ERROR("Extraction backend does not support DOS names!");
1139                         return WIMLIB_ERR_UNSUPPORTED;
1140                 }
1141                 WARNING("Ignoring DOS names of %lu files",
1142                         required_features->short_names);
1143         }
1144
1145         /* Timestamps.  */
1146         if ((extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS) &&
1147             !supported_features->timestamps)
1148         {
1149                 ERROR("Extraction backend does not support timestamps!");
1150                 return WIMLIB_ERR_UNSUPPORTED;
1151         }
1152
1153         return 0;
1154 }
1155
1156 static const struct apply_operations *
1157 select_apply_operations(int extract_flags)
1158 {
1159 #ifdef WITH_NTFS_3G
1160         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS)
1161                 return &ntfs_3g_apply_ops;
1162 #endif
1163 #ifdef __WIN32__
1164         return &win32_apply_ops;
1165 #else
1166         return &unix_apply_ops;
1167 #endif
1168 }
1169
1170 static int
1171 extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
1172               const tchar *target, int extract_flags,
1173               wimlib_progress_func_t progress_func)
1174 {
1175         const struct apply_operations *ops;
1176         struct apply_ctx *ctx;
1177         int ret;
1178         LIST_HEAD(dentry_list);
1179
1180         if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) {
1181                 ret = extract_dentries_to_stdout(trees, num_trees,
1182                                                  wim->lookup_table);
1183                 goto out;
1184         }
1185
1186         num_trees = remove_duplicate_trees(trees, num_trees);
1187         num_trees = remove_contained_trees(trees, num_trees);
1188
1189         ops = select_apply_operations(extract_flags);
1190
1191         if (num_trees > 1 && ops->single_tree_only) {
1192                 ERROR("Extracting multiple directory trees "
1193                       "at once is not supported in %s extraction mode!",
1194                       ops->name);
1195                 ret = WIMLIB_ERR_UNSUPPORTED;
1196                 goto out;
1197         }
1198
1199         ctx = CALLOC(1, ops->context_size);
1200         if (!ctx) {
1201                 ret = WIMLIB_ERR_NOMEM;
1202                 goto out;
1203         }
1204
1205         ctx->wim = wim;
1206         ctx->target = target;
1207         ctx->target_nchars = tstrlen(target);
1208         ctx->extract_flags = extract_flags;
1209         if (progress_func) {
1210                 ctx->progress_func = progress_func;
1211                 ctx->progress.extract.image = wim->current_image;
1212                 ctx->progress.extract.extract_flags = (extract_flags &
1213                                                        WIMLIB_EXTRACT_MASK_PUBLIC);
1214                 ctx->progress.extract.wimfile_name = wim->filename;
1215                 ctx->progress.extract.image_name = wimlib_get_image_name(wim,
1216                                                                          wim->current_image);
1217                 ctx->progress.extract.target = target;
1218         }
1219         INIT_LIST_HEAD(&ctx->stream_list);
1220
1221         ret = (*ops->get_supported_features)(target, &ctx->supported_features);
1222         if (ret)
1223                 goto out_cleanup;
1224
1225         build_dentry_list(&dentry_list, trees, num_trees,
1226                           !(extract_flags &
1227                             WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE));
1228
1229         dentry_list_get_features(&dentry_list, &ctx->required_features);
1230
1231         ret = do_feature_check(&ctx->required_features, &ctx->supported_features,
1232                                ctx->extract_flags);
1233         if (ret)
1234                 goto out_cleanup;
1235
1236         ret = dentry_list_calculate_extraction_names(&dentry_list, ctx);
1237         if (ret)
1238                 goto out_cleanup;
1239
1240         ret = dentry_list_resolve_streams(&dentry_list, ctx);
1241         if (ret)
1242                 goto out_cleanup;
1243
1244         ret = dentry_list_ref_streams(&dentry_list, ctx);
1245         if (ret)
1246                 goto out_cleanup;
1247
1248         dentry_list_build_inode_alias_lists(&dentry_list);
1249
1250         if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
1251                 /* When extracting from a pipe, the number of bytes of data to
1252                  * extract can't be determined in the normal way (examining the
1253                  * lookup table), since at this point all we have is a set of
1254                  * SHA1 message digests of streams that need to be extracted.
1255                  * However, we can get a reasonably accurate estimate by taking
1256                  * <TOTALBYTES> from the corresponding <IMAGE> in the WIM XML
1257                  * data.  This does assume that a full image is being extracted,
1258                  * but currently there is no API for doing otherwise.  (Also,
1259                  * subtract <HARDLINKBYTES> from this if hard links are
1260                  * supported by the extraction mode.)  */
1261                 ctx->progress.extract.total_bytes =
1262                         wim_info_get_image_total_bytes(wim->wim_info,
1263                                                        wim->current_image);
1264                 if (ctx->supported_features.hard_links) {
1265                         ctx->progress.extract.total_bytes -=
1266                                 wim_info_get_image_hard_link_bytes(wim->wim_info,
1267                                                                    wim->current_image);
1268                 }
1269         }
1270
1271         if (ctx->progress_func) {
1272                 int msg;
1273                 if (extract_flags & WIMLIB_EXTRACT_FLAG_IMAGEMODE)
1274                         msg = WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN;
1275                 else
1276                         msg = WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN;
1277                 (*ctx->progress_func)(msg, &ctx->progress);
1278         }
1279
1280         ret = (*ops->extract)(&dentry_list, ctx);
1281         if (ret)
1282                 goto out_cleanup;
1283
1284         if (ctx->progress_func &&
1285             ctx->progress.extract.completed_bytes <
1286                 ctx->progress.extract.total_bytes)
1287         {
1288                 ctx->progress.extract.completed_bytes =
1289                         ctx->progress.extract.total_bytes;
1290                 (*ctx->progress_func)(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS,
1291                                       &ctx->progress);
1292         }
1293
1294         if (ctx->progress_func) {
1295                 int msg;
1296                 if (extract_flags & WIMLIB_EXTRACT_FLAG_IMAGEMODE)
1297                         msg = WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END;
1298                 else
1299                         msg = WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END;
1300                 (*ctx->progress_func)(msg, &ctx->progress);
1301         }
1302         ret = 0;
1303 out_cleanup:
1304         destroy_stream_list(&ctx->stream_list);
1305         destroy_dentry_list(&dentry_list);
1306         FREE(ctx);
1307 out:
1308         return ret;
1309 }
1310
1311 static int
1312 mkdir_if_needed(const tchar *target)
1313 {
1314         struct stat stbuf;
1315         if (tstat(target, &stbuf)) {
1316                 if (errno == ENOENT) {
1317                         if (tmkdir(target, 0755)) {
1318                                 ERROR_WITH_ERRNO("Failed to create directory "
1319                                                  "\"%"TS"\"", target);
1320                                 return WIMLIB_ERR_MKDIR;
1321                         }
1322                 } else {
1323                         ERROR_WITH_ERRNO("Failed to stat \"%"TS"\"", target);
1324                         return WIMLIB_ERR_STAT;
1325                 }
1326         } else if (!S_ISDIR(stbuf.st_mode)) {
1327                 ERROR("\"%"TS"\" is not a directory", target);
1328                 return WIMLIB_ERR_NOTDIR;
1329         }
1330         return 0;
1331 }
1332
1333 /* Make sure the extraction flags make sense, and update them if needed.  */
1334 static int
1335 check_extract_flags(const WIMStruct *wim, int *extract_flags_p)
1336 {
1337         int extract_flags = *extract_flags_p;
1338
1339         /* Check for invalid flag combinations  */
1340
1341         if ((extract_flags &
1342              (WIMLIB_EXTRACT_FLAG_NO_ACLS |
1343               WIMLIB_EXTRACT_FLAG_STRICT_ACLS)) == (WIMLIB_EXTRACT_FLAG_NO_ACLS |
1344                                                     WIMLIB_EXTRACT_FLAG_STRICT_ACLS))
1345                 return WIMLIB_ERR_INVALID_PARAM;
1346
1347         if ((extract_flags &
1348              (WIMLIB_EXTRACT_FLAG_RPFIX |
1349               WIMLIB_EXTRACT_FLAG_NORPFIX)) == (WIMLIB_EXTRACT_FLAG_RPFIX |
1350                                                 WIMLIB_EXTRACT_FLAG_NORPFIX))
1351                 return WIMLIB_ERR_INVALID_PARAM;
1352
1353 #ifndef WITH_NTFS_3G
1354         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
1355                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
1356                       "        it cannot apply a WIM image directly to an NTFS volume.");
1357                 return WIMLIB_ERR_UNSUPPORTED;
1358         }
1359 #endif
1360
1361 #ifndef __WIN32__
1362         if (extract_flags & WIMLIB_EXTRACT_FLAG_WIMBOOT) {
1363                 ERROR("WIMBoot extraction is only supported on Windows!");
1364                 return WIMLIB_ERR_UNSUPPORTED;
1365         }
1366 #endif
1367
1368         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
1369                               WIMLIB_EXTRACT_FLAG_NORPFIX |
1370                               WIMLIB_EXTRACT_FLAG_IMAGEMODE)) ==
1371                                         WIMLIB_EXTRACT_FLAG_IMAGEMODE)
1372         {
1373                 /* For full-image extraction, do reparse point fixups by default
1374                  * if the WIM header says they are enabled.  */
1375                 if (wim->hdr.flags & WIM_HDR_FLAG_RP_FIX)
1376                         extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX;
1377         }
1378
1379         *extract_flags_p = extract_flags;
1380         return 0;
1381 }
1382
1383 static u32
1384 get_wildcard_flags(int extract_flags)
1385 {
1386         u32 wildcard_flags = 0;
1387
1388         if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_GLOB)
1389                 wildcard_flags |= WILDCARD_FLAG_ERROR_IF_NO_MATCH;
1390         else
1391                 wildcard_flags |= WILDCARD_FLAG_WARN_IF_NO_MATCH;
1392
1393         if (default_ignore_case)
1394                 wildcard_flags |= WILDCARD_FLAG_CASE_INSENSITIVE;
1395
1396         return wildcard_flags;
1397 }
1398
1399 struct append_dentry_ctx {
1400         struct wim_dentry **dentries;
1401         size_t num_dentries;
1402         size_t num_alloc_dentries;
1403 };
1404
1405 static int
1406 append_dentry_cb(struct wim_dentry *dentry, void *_ctx)
1407 {
1408         struct append_dentry_ctx *ctx = _ctx;
1409
1410         if (ctx->num_dentries == ctx->num_alloc_dentries) {
1411                 struct wim_dentry **new_dentries;
1412                 size_t new_length;
1413
1414                 new_length = max(ctx->num_alloc_dentries + 8,
1415                                  ctx->num_alloc_dentries * 3 / 2);
1416                 new_dentries = REALLOC(ctx->dentries,
1417                                        new_length * sizeof(ctx->dentries[0]));
1418                 if (new_dentries == NULL)
1419                         return WIMLIB_ERR_NOMEM;
1420                 ctx->dentries = new_dentries;
1421                 ctx->num_alloc_dentries = new_length;
1422         }
1423         ctx->dentries[ctx->num_dentries++] = dentry;
1424         return 0;
1425 }
1426
1427 static int
1428 do_wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target,
1429                         const tchar * const *paths, size_t num_paths,
1430                         int extract_flags, wimlib_progress_func_t progress_func)
1431 {
1432         int ret;
1433         struct wim_dentry **trees;
1434         size_t num_trees;
1435
1436         if (wim == NULL || target == NULL || target[0] == T('\0') ||
1437             (num_paths != 0 && paths == NULL))
1438                 return WIMLIB_ERR_INVALID_PARAM;
1439
1440         ret = check_extract_flags(wim, &extract_flags);
1441         if (ret)
1442                 return ret;
1443
1444         ret = select_wim_image(wim, image);
1445         if (ret)
1446                 return ret;
1447
1448         ret = wim_checksum_unhashed_streams(wim);
1449         if (ret)
1450                 return ret;
1451
1452         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_NTFS |
1453                               WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE)) ==
1454             (WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE))
1455         {
1456                 ret = mkdir_if_needed(target);
1457                 if (ret)
1458                         return ret;
1459         }
1460
1461         if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) {
1462
1463                 struct append_dentry_ctx append_dentry_ctx = {
1464                         .dentries = NULL,
1465                         .num_dentries = 0,
1466                         .num_alloc_dentries = 0,
1467                 };
1468
1469                 u32 wildcard_flags = get_wildcard_flags(extract_flags);
1470
1471                 for (size_t i = 0; i < num_paths; i++) {
1472                         tchar *path = canonicalize_wim_path(paths[i]);
1473                         if (path == NULL) {
1474                                 ret = WIMLIB_ERR_NOMEM;
1475                                 trees = append_dentry_ctx.dentries;
1476                                 goto out_free_trees;
1477                         }
1478                         ret = expand_wildcard(wim, path,
1479                                               append_dentry_cb,
1480                                               &append_dentry_ctx,
1481                                               wildcard_flags);
1482                         FREE(path);
1483                         if (ret) {
1484                                 trees = append_dentry_ctx.dentries;
1485                                 goto out_free_trees;
1486                         }
1487                 }
1488                 trees = append_dentry_ctx.dentries;
1489                 num_trees = append_dentry_ctx.num_dentries;
1490         } else {
1491                 trees = MALLOC(num_paths * sizeof(trees[0]));
1492                 if (trees == NULL)
1493                         return WIMLIB_ERR_NOMEM;
1494
1495                 for (size_t i = 0; i < num_paths; i++) {
1496
1497                         tchar *path = canonicalize_wim_path(paths[i]);
1498                         if (path == NULL) {
1499                                 ret = WIMLIB_ERR_NOMEM;
1500                                 goto out_free_trees;
1501                         }
1502
1503                         trees[i] = get_dentry(wim, path,
1504                                               WIMLIB_CASE_PLATFORM_DEFAULT);
1505                         FREE(path);
1506                         if (trees[i] == NULL) {
1507                                   ERROR("Path \"%"TS"\" does not exist "
1508                                         "in WIM image %d",
1509                                         paths[i], wim->current_image);
1510                                   ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
1511                                   goto out_free_trees;
1512                         }
1513                 }
1514                 num_trees = num_paths;
1515         }
1516
1517         if (num_trees == 0) {
1518                 ret = 0;
1519                 goto out_free_trees;
1520         }
1521
1522         ret = extract_trees(wim, trees, num_trees,
1523                             target, extract_flags, progress_func);
1524 out_free_trees:
1525         FREE(trees);
1526         return ret;
1527 }
1528
1529 static int
1530 extract_single_image(WIMStruct *wim, int image,
1531                      const tchar *target, int extract_flags,
1532                      wimlib_progress_func_t progress_func)
1533 {
1534         const tchar *path = WIMLIB_WIM_ROOT_PATH;
1535         extract_flags |= WIMLIB_EXTRACT_FLAG_IMAGEMODE;
1536         return do_wimlib_extract_paths(wim, image, target, &path, 1,
1537                                        extract_flags, progress_func);
1538 }
1539
1540 static const tchar * const filename_forbidden_chars =
1541 T(
1542 #ifdef __WIN32__
1543 "<>:\"/\\|?*"
1544 #else
1545 "/"
1546 #endif
1547 );
1548
1549 /* This function checks if it is okay to use a WIM image's name as a directory
1550  * name.  */
1551 static bool
1552 image_name_ok_as_dir(const tchar *image_name)
1553 {
1554         return image_name && *image_name &&
1555                 !tstrpbrk(image_name, filename_forbidden_chars) &&
1556                 tstrcmp(image_name, T(".")) &&
1557                 tstrcmp(image_name, T(".."));
1558 }
1559
1560 /* Extracts all images from the WIM to the directory @target, with the images
1561  * placed in subdirectories named by their image names. */
1562 static int
1563 extract_all_images(WIMStruct *wim,
1564                    const tchar *target,
1565                    int extract_flags,
1566                    wimlib_progress_func_t progress_func)
1567 {
1568         size_t image_name_max_len = max(xml_get_max_image_name_len(wim), 20);
1569         size_t output_path_len = tstrlen(target);
1570         tchar buf[output_path_len + 1 + image_name_max_len + 1];
1571         int ret;
1572         int image;
1573         const tchar *image_name;
1574
1575         extract_flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
1576
1577         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
1578                 ERROR("Cannot extract multiple images in NTFS extraction mode.");
1579                 return WIMLIB_ERR_INVALID_PARAM;
1580         }
1581
1582         ret = mkdir_if_needed(target);
1583         if (ret)
1584                 return ret;
1585         tmemcpy(buf, target, output_path_len);
1586         buf[output_path_len] = OS_PREFERRED_PATH_SEPARATOR;
1587         for (image = 1; image <= wim->hdr.image_count; image++) {
1588                 image_name = wimlib_get_image_name(wim, image);
1589                 if (image_name_ok_as_dir(image_name)) {
1590                         tstrcpy(buf + output_path_len + 1, image_name);
1591                 } else {
1592                         /* Image name is empty or contains forbidden characters.
1593                          * Use image number instead. */
1594                         tsprintf(buf + output_path_len + 1, T("%d"), image);
1595                 }
1596                 ret = extract_single_image(wim, image, buf, extract_flags,
1597                                            progress_func);
1598                 if (ret)
1599                         return ret;
1600         }
1601         return 0;
1602 }
1603
1604 static int
1605 do_wimlib_extract_image(WIMStruct *wim,
1606                         int image,
1607                         const tchar *target,
1608                         int extract_flags,
1609                         wimlib_progress_func_t progress_func)
1610 {
1611         if (extract_flags & (WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE |
1612                              WIMLIB_EXTRACT_FLAG_TO_STDOUT |
1613                              WIMLIB_EXTRACT_FLAG_GLOB_PATHS))
1614                 return WIMLIB_ERR_INVALID_PARAM;
1615
1616         if (image == WIMLIB_ALL_IMAGES)
1617                 return extract_all_images(wim, target, extract_flags,
1618                                           progress_func);
1619         else
1620                 return extract_single_image(wim, image, target, extract_flags,
1621                                             progress_func);
1622 }
1623
1624
1625 /****************************************************************************
1626  *                          Extraction API                                  *
1627  ****************************************************************************/
1628
1629 WIMLIBAPI int
1630 wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target,
1631                      const tchar * const *paths, size_t num_paths,
1632                      int extract_flags, wimlib_progress_func_t progress_func)
1633 {
1634         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
1635                 return WIMLIB_ERR_INVALID_PARAM;
1636
1637         return do_wimlib_extract_paths(wim, image, target, paths, num_paths,
1638                                        extract_flags, progress_func);
1639 }
1640
1641 WIMLIBAPI int
1642 wimlib_extract_pathlist(WIMStruct *wim, int image, const tchar *target,
1643                         const tchar *path_list_file, int extract_flags,
1644                         wimlib_progress_func_t progress_func)
1645 {
1646         int ret;
1647         tchar **paths;
1648         size_t num_paths;
1649         void *mem;
1650
1651         ret = read_path_list_file(path_list_file, &paths, &num_paths, &mem);
1652         if (ret) {
1653                 ERROR("Failed to read path list file \"%"TS"\"",
1654                       path_list_file);
1655                 return ret;
1656         }
1657
1658         ret = wimlib_extract_paths(wim, image, target,
1659                                    (const tchar * const *)paths, num_paths,
1660                                    extract_flags, progress_func);
1661         FREE(paths);
1662         FREE(mem);
1663         return ret;
1664 }
1665
1666 WIMLIBAPI int
1667 wimlib_extract_image_from_pipe(int pipe_fd, const tchar *image_num_or_name,
1668                                const tchar *target, int extract_flags,
1669                                wimlib_progress_func_t progress_func)
1670 {
1671         int ret;
1672         WIMStruct *pwm;
1673         struct filedes *in_fd;
1674         int image;
1675         unsigned i;
1676
1677         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
1678                 return WIMLIB_ERR_INVALID_PARAM;
1679
1680         /* Read the WIM header from the pipe and get a WIMStruct to represent
1681          * the pipable WIM.  Caveats:  Unlike getting a WIMStruct with
1682          * wimlib_open_wim(), getting a WIMStruct in this way will result in
1683          * an empty lookup table, no XML data read, and no filename set.  */
1684         ret = open_wim_as_WIMStruct(&pipe_fd,
1685                                     WIMLIB_OPEN_FLAG_FROM_PIPE,
1686                                     &pwm, progress_func);
1687         if (ret)
1688                 return ret;
1689
1690         /* Sanity check to make sure this is a pipable WIM.  */
1691         if (pwm->hdr.magic != PWM_MAGIC) {
1692                 ERROR("The WIM being read from file descriptor %d "
1693                       "is not pipable!", pipe_fd);
1694                 ret = WIMLIB_ERR_NOT_PIPABLE;
1695                 goto out_wimlib_free;
1696         }
1697
1698         /* Sanity check to make sure the first part of a pipable split WIM is
1699          * sent over the pipe first.  */
1700         if (pwm->hdr.part_number != 1) {
1701                 ERROR("The first part of the split WIM must be "
1702                       "sent over the pipe first.");
1703                 ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1704                 goto out_wimlib_free;
1705         }
1706
1707         in_fd = &pwm->in_fd;
1708         wimlib_assert(in_fd->offset == WIM_HEADER_DISK_SIZE);
1709
1710         /* As mentioned, the WIMStruct we created from the pipe does not have
1711          * XML data yet.  Fix this by reading the extra copy of the XML data
1712          * that directly follows the header in pipable WIMs.  (Note: see
1713          * write_pipable_wim() for more details about the format of pipable
1714          * WIMs.)  */
1715         {
1716                 struct wim_lookup_table_entry xml_lte;
1717                 struct wim_resource_spec xml_rspec;
1718                 ret = read_pwm_stream_header(pwm, &xml_lte, &xml_rspec, 0, NULL);
1719                 if (ret)
1720                         goto out_wimlib_free;
1721
1722                 if (!(xml_lte.flags & WIM_RESHDR_FLAG_METADATA))
1723                 {
1724                         ERROR("Expected XML data, but found non-metadata "
1725                               "stream.");
1726                         ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1727                         goto out_wimlib_free;
1728                 }
1729
1730                 wim_res_spec_to_hdr(&xml_rspec, &pwm->hdr.xml_data_reshdr);
1731
1732                 ret = read_wim_xml_data(pwm);
1733                 if (ret)
1734                         goto out_wimlib_free;
1735
1736                 if (wim_info_get_num_images(pwm->wim_info) != pwm->hdr.image_count) {
1737                         ERROR("Image count in XML data is not the same as in WIM header.");
1738                         ret = WIMLIB_ERR_IMAGE_COUNT;
1739                         goto out_wimlib_free;
1740                 }
1741         }
1742
1743         /* Get image index (this may use the XML data that was just read to
1744          * resolve an image name).  */
1745         if (image_num_or_name) {
1746                 image = wimlib_resolve_image(pwm, image_num_or_name);
1747                 if (image == WIMLIB_NO_IMAGE) {
1748                         ERROR("\"%"TS"\" is not a valid image in the pipable WIM!",
1749                               image_num_or_name);
1750                         ret = WIMLIB_ERR_INVALID_IMAGE;
1751                         goto out_wimlib_free;
1752                 } else if (image == WIMLIB_ALL_IMAGES) {
1753                         ERROR("Applying all images from a pipe is not supported!");
1754                         ret = WIMLIB_ERR_INVALID_IMAGE;
1755                         goto out_wimlib_free;
1756                 }
1757         } else {
1758                 if (pwm->hdr.image_count != 1) {
1759                         ERROR("No image was specified, but the pipable WIM "
1760                               "did not contain exactly 1 image");
1761                         ret = WIMLIB_ERR_INVALID_IMAGE;
1762                         goto out_wimlib_free;
1763                 }
1764                 image = 1;
1765         }
1766
1767         /* Load the needed metadata resource.  */
1768         for (i = 1; i <= pwm->hdr.image_count; i++) {
1769                 struct wim_lookup_table_entry *metadata_lte;
1770                 struct wim_image_metadata *imd;
1771                 struct wim_resource_spec *metadata_rspec;
1772
1773                 metadata_lte = new_lookup_table_entry();
1774                 if (metadata_lte == NULL) {
1775                         ret = WIMLIB_ERR_NOMEM;
1776                         goto out_wimlib_free;
1777                 }
1778                 metadata_rspec = MALLOC(sizeof(struct wim_resource_spec));
1779                 if (metadata_rspec == NULL) {
1780                         ret = WIMLIB_ERR_NOMEM;
1781                         free_lookup_table_entry(metadata_lte);
1782                         goto out_wimlib_free;
1783                 }
1784
1785                 ret = read_pwm_stream_header(pwm, metadata_lte, metadata_rspec, 0, NULL);
1786                 imd = pwm->image_metadata[i - 1];
1787                 imd->metadata_lte = metadata_lte;
1788                 if (ret) {
1789                         FREE(metadata_rspec);
1790                         goto out_wimlib_free;
1791                 }
1792
1793                 if (!(metadata_lte->flags & WIM_RESHDR_FLAG_METADATA)) {
1794                         ERROR("Expected metadata resource, but found "
1795                               "non-metadata stream.");
1796                         ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1797                         goto out_wimlib_free;
1798                 }
1799
1800                 if (i == image) {
1801                         /* Metadata resource is for the image being extracted.
1802                          * Parse it and save the metadata in memory.  */
1803                         ret = read_metadata_resource(pwm, imd);
1804                         if (ret)
1805                                 goto out_wimlib_free;
1806                         imd->modified = 1;
1807                 } else {
1808                         /* Metadata resource is not for the image being
1809                          * extracted.  Skip over it.  */
1810                         ret = skip_wim_stream(metadata_lte);
1811                         if (ret)
1812                                 goto out_wimlib_free;
1813                 }
1814         }
1815         /* Extract the image.  */
1816         extract_flags |= WIMLIB_EXTRACT_FLAG_FROM_PIPE;
1817         ret = do_wimlib_extract_image(pwm, image, target,
1818                                       extract_flags, progress_func);
1819         /* Clean up and return.  */
1820 out_wimlib_free:
1821         wimlib_free(pwm);
1822         return ret;
1823 }
1824
1825 WIMLIBAPI int
1826 wimlib_extract_image(WIMStruct *wim, int image, const tchar *target,
1827                      int extract_flags, wimlib_progress_func_t progress_func)
1828 {
1829         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
1830                 return WIMLIB_ERR_INVALID_PARAM;
1831         return do_wimlib_extract_image(wim, image, target, extract_flags,
1832                                        progress_func);
1833 }