]> wimlib.net Git - wimlib/blob - src/extract.c
extract.c: Remove unused internal flag
[wimlib] / src / extract.c
1 /*
2  * extract.c
3  *
4  * Support for extracting WIM images, or files or directories contained in a WIM
5  * image.
6  */
7
8 /*
9  * Copyright (C) 2012, 2013, 2014 Eric Biggers
10  *
11  * This file is part of wimlib, a library for working with WIM files.
12  *
13  * wimlib is free software; you can redistribute it and/or modify it under the
14  * terms of the GNU General Public License as published by the Free
15  * Software Foundation; either version 3 of the License, or (at your option)
16  * any later version.
17  *
18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
21  * details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
25  */
26
27 /*
28  * This file provides the API functions wimlib_extract_image(),
29  * wimlib_extract_image_from_pipe(), wimlib_extract_paths(), and
30  * wimlib_extract_pathlist().  Internally, all end up calling
31  * do_wimlib_extract_paths() and extract_trees().
32  *
33  * Although wimlib supports multiple extraction modes/backends (NTFS-3g, UNIX,
34  * Win32), this file does not itself have code to extract files or directories
35  * to any specific target; instead, it handles generic functionality and relies
36  * on lower-level callback functions declared in `struct apply_operations' to do
37  * the actual extraction.
38  */
39
40 #ifdef HAVE_CONFIG_H
41 #  include "config.h"
42 #endif
43
44 #include "wimlib/apply.h"
45 #include "wimlib/dentry.h"
46 #include "wimlib/encoding.h"
47 #include "wimlib/endianness.h"
48 #include "wimlib/error.h"
49 #include "wimlib/lookup_table.h"
50 #include "wimlib/metadata.h"
51 #include "wimlib/pathlist.h"
52 #include "wimlib/paths.h"
53 #include "wimlib/reparse.h"
54 #include "wimlib/resource.h"
55 #include "wimlib/security.h"
56 #include "wimlib/unix_data.h"
57 #ifdef __WIN32__
58 #  include "wimlib/win32.h" /* for realpath() equivalent */
59 #endif
60 #include "wimlib/xml.h"
61 #include "wimlib/wildcard.h"
62 #include "wimlib/wim.h"
63
64 #include <errno.h>
65 #include <fcntl.h>
66 #include <stdlib.h>
67 #include <sys/stat.h>
68 #include <unistd.h>
69
70 #define WIMLIB_EXTRACT_FLAG_FROM_PIPE   0x80000000
71 #define WIMLIB_EXTRACT_FLAG_IMAGEMODE   0x40000000
72
73 /* Keep in sync with wimlib.h  */
74 #define WIMLIB_EXTRACT_MASK_PUBLIC                              \
75         (WIMLIB_EXTRACT_FLAG_NTFS                       |       \
76          WIMLIB_EXTRACT_FLAG_UNIX_DATA                  |       \
77          WIMLIB_EXTRACT_FLAG_NO_ACLS                    |       \
78          WIMLIB_EXTRACT_FLAG_STRICT_ACLS                |       \
79          WIMLIB_EXTRACT_FLAG_RPFIX                      |       \
80          WIMLIB_EXTRACT_FLAG_NORPFIX                    |       \
81          WIMLIB_EXTRACT_FLAG_TO_STDOUT                  |       \
82          WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES  |       \
83          WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS         |       \
84          WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS          |       \
85          WIMLIB_EXTRACT_FLAG_STRICT_SHORT_NAMES         |       \
86          WIMLIB_EXTRACT_FLAG_STRICT_SYMLINKS            |       \
87          WIMLIB_EXTRACT_FLAG_GLOB_PATHS                 |       \
88          WIMLIB_EXTRACT_FLAG_STRICT_GLOB                |       \
89          WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES              |       \
90          WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE  |       \
91          WIMLIB_EXTRACT_FLAG_WIMBOOT)
92
93 /* Check whether the extraction of a dentry should be skipped completely.  */
94 static bool
95 dentry_is_supported(struct wim_dentry *dentry,
96                     const struct wim_features *supported_features)
97 {
98         struct wim_inode *inode = dentry->d_inode;
99
100         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
101                 return supported_features->reparse_points ||
102                         (inode_is_symlink(inode) &&
103                          supported_features->symlink_reparse_points);
104         }
105         if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
106                 if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)
107                         return supported_features->encrypted_directories != 0;
108                 else
109                         return supported_features->encrypted_files != 0;
110         }
111         return true;
112 }
113
114
115 #define PWM_ALLOW_WIM_HDR 0x00001
116
117 /* Read the header from a stream in a pipable WIM.  */
118 static int
119 read_pwm_stream_header(WIMStruct *pwm, struct wim_lookup_table_entry *lte,
120                        struct wim_resource_spec *rspec,
121                        int flags, struct wim_header_disk *hdr_ret)
122 {
123         union {
124                 struct pwm_stream_hdr stream_hdr;
125                 struct wim_header_disk pwm_hdr;
126         } buf;
127         struct wim_reshdr reshdr;
128         int ret;
129
130         ret = full_read(&pwm->in_fd, &buf.stream_hdr, sizeof(buf.stream_hdr));
131         if (ret)
132                 goto read_error;
133
134         if ((flags & PWM_ALLOW_WIM_HDR) && buf.stream_hdr.magic == PWM_MAGIC) {
135                 BUILD_BUG_ON(sizeof(buf.pwm_hdr) < sizeof(buf.stream_hdr));
136                 ret = full_read(&pwm->in_fd, &buf.stream_hdr + 1,
137                                 sizeof(buf.pwm_hdr) - sizeof(buf.stream_hdr));
138
139                 if (ret)
140                         goto read_error;
141                 lte->resource_location = RESOURCE_NONEXISTENT;
142                 memcpy(hdr_ret, &buf.pwm_hdr, sizeof(buf.pwm_hdr));
143                 return 0;
144         }
145
146         if (le64_to_cpu(buf.stream_hdr.magic) != PWM_STREAM_MAGIC) {
147                 ERROR("Data read on pipe is invalid (expected stream header).");
148                 return WIMLIB_ERR_INVALID_PIPABLE_WIM;
149         }
150
151         copy_hash(lte->hash, buf.stream_hdr.hash);
152
153         reshdr.size_in_wim = 0;
154         reshdr.flags = le32_to_cpu(buf.stream_hdr.flags);
155         reshdr.offset_in_wim = pwm->in_fd.offset;
156         reshdr.uncompressed_size = le64_to_cpu(buf.stream_hdr.uncompressed_size);
157         wim_res_hdr_to_spec(&reshdr, pwm, rspec);
158         lte_bind_wim_resource_spec(lte, rspec);
159         lte->flags = rspec->flags;
160         lte->size = rspec->uncompressed_size;
161         lte->offset_in_res = 0;
162         return 0;
163
164 read_error:
165         ERROR_WITH_ERRNO("Error reading pipable WIM from pipe");
166         return ret;
167 }
168
169 static int
170 load_streams_from_pipe(struct apply_ctx *ctx,
171                        const struct read_stream_list_callbacks *cbs)
172 {
173         struct wim_lookup_table_entry *found_lte = NULL;
174         struct wim_resource_spec *rspec = NULL;
175         struct wim_lookup_table *lookup_table;
176         int ret;
177
178         ret = WIMLIB_ERR_NOMEM;
179         found_lte = new_lookup_table_entry();
180         if (!found_lte)
181                 goto out;
182
183         rspec = MALLOC(sizeof(struct wim_resource_spec));
184         if (!rspec)
185                 goto out;
186
187         lookup_table = ctx->wim->lookup_table;
188         memcpy(ctx->progress.extract.guid, ctx->wim->hdr.guid, WIM_GUID_LEN);
189         ctx->progress.extract.part_number = ctx->wim->hdr.part_number;
190         ctx->progress.extract.total_parts = ctx->wim->hdr.total_parts;
191         ret = extract_progress(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_SPWM_PART_BEGIN);
192         if (ret)
193                 goto out;
194
195         while (ctx->num_streams_remaining) {
196                 struct wim_header_disk pwm_hdr;
197                 struct wim_lookup_table_entry *needed_lte;
198
199                 if (found_lte->resource_location != RESOURCE_NONEXISTENT)
200                         lte_unbind_wim_resource_spec(found_lte);
201                 ret = read_pwm_stream_header(ctx->wim, found_lte, rspec,
202                                              PWM_ALLOW_WIM_HDR, &pwm_hdr);
203                 if (ret)
204                         goto out;
205
206                 if ((found_lte->resource_location != RESOURCE_NONEXISTENT)
207                     && !(found_lte->flags & WIM_RESHDR_FLAG_METADATA)
208                     && (needed_lte = lookup_stream(lookup_table, found_lte->hash))
209                     && (needed_lte->out_refcnt))
210                 {
211                         needed_lte->offset_in_res = found_lte->offset_in_res;
212                         needed_lte->flags = found_lte->flags;
213                         needed_lte->size = found_lte->size;
214
215                         lte_unbind_wim_resource_spec(found_lte);
216                         lte_bind_wim_resource_spec(needed_lte, rspec);
217
218                         ret = (*cbs->begin_stream)(needed_lte, 0,
219                                                    cbs->begin_stream_ctx);
220                         if (ret) {
221                                 lte_unbind_wim_resource_spec(needed_lte);
222                                 goto out;
223                         }
224
225                         ret = extract_stream(needed_lte, needed_lte->size,
226                                              cbs->consume_chunk,
227                                              cbs->consume_chunk_ctx);
228
229                         ret = (*cbs->end_stream)(needed_lte, ret,
230                                                  cbs->end_stream_ctx);
231                         lte_unbind_wim_resource_spec(needed_lte);
232                         if (ret)
233                                 goto out;
234                         ctx->num_streams_remaining--;
235                 } else if (found_lte->resource_location != RESOURCE_NONEXISTENT) {
236                         ret = skip_wim_stream(found_lte);
237                         if (ret)
238                                 goto out;
239                 } else {
240                         u16 part_number = le16_to_cpu(pwm_hdr.part_number);
241                         u16 total_parts = le16_to_cpu(pwm_hdr.total_parts);
242
243                         if (part_number != ctx->progress.extract.part_number ||
244                             total_parts != ctx->progress.extract.total_parts ||
245                             memcmp(pwm_hdr.guid, ctx->progress.extract.guid,
246                                    WIM_GUID_LEN))
247                         {
248                                 ctx->progress.extract.part_number = part_number;
249                                 ctx->progress.extract.total_parts = total_parts;
250                                 memcpy(ctx->progress.extract.guid,
251                                        pwm_hdr.guid, WIM_GUID_LEN);
252                                 ret = extract_progress(ctx,
253                                                        WIMLIB_PROGRESS_MSG_EXTRACT_SPWM_PART_BEGIN);
254                                 if (ret)
255                                         goto out;
256                         }
257                 }
258         }
259         ret = 0;
260 out:
261         if (found_lte->resource_location != RESOURCE_IN_WIM)
262                 FREE(rspec);
263         free_lookup_table_entry(found_lte);
264         return ret;
265 }
266
267 static int
268 begin_extract_stream_with_progress(struct wim_lookup_table_entry *lte,
269                                    u32 flags, void *_ctx)
270 {
271         struct apply_ctx *ctx = _ctx;
272
273         ctx->cur_stream = lte;
274
275         return (*ctx->saved_cbs->begin_stream)(lte, flags,
276                                                ctx->saved_cbs->begin_stream_ctx);
277 }
278
279 static int
280 consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx)
281 {
282         struct apply_ctx *ctx = _ctx;
283         union wimlib_progress_info *progress = &ctx->progress;
284         int ret;
285
286         if (likely(ctx->supported_features.hard_links)) {
287                 progress->extract.completed_bytes +=
288                         (u64)size * ctx->cur_stream->out_refcnt;
289         } else {
290                 const struct stream_owner *owners = stream_owners(ctx->cur_stream);
291                 for (u32 i = 0; i < ctx->cur_stream->out_refcnt; i++) {
292                         const struct wim_inode *inode = owners[i].inode;
293                         const struct wim_dentry *dentry;
294
295                         list_for_each_entry(dentry,
296                                             &inode->i_extraction_aliases,
297                                             d_extraction_alias_node)
298                         {
299                                 progress->extract.completed_bytes += size;
300                         }
301                 }
302         }
303         if (progress->extract.completed_bytes >= ctx->next_progress) {
304
305                 ret = extract_progress(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS);
306                 if (ret)
307                         return ret;
308
309                 if (progress->extract.completed_bytes >=
310                     progress->extract.total_bytes)
311                 {
312                         ctx->next_progress = UINT64_MAX;
313                 } else {
314                         ctx->next_progress += progress->extract.total_bytes / 128;
315                         if (ctx->next_progress > progress->extract.total_bytes)
316                                 ctx->next_progress = progress->extract.total_bytes;
317                 }
318         }
319         return (*ctx->saved_cbs->consume_chunk)(chunk, size,
320                                                 ctx->saved_cbs->consume_chunk_ctx);
321 }
322
323 /*
324  * Read the list of single-instance streams to extract and feed their data into
325  * the specified callback functions.
326  *
327  * This handles checksumming each stream.
328  *
329  * This also handles sending WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS.
330  *
331  * This also works if the WIM is being read from a pipe, whereas attempting to
332  * read streams directly (e.g. with read_full_stream_into_buf()) will not.
333  */
334 int
335 extract_stream_list(struct apply_ctx *ctx,
336                     const struct read_stream_list_callbacks *cbs)
337 {
338         struct read_stream_list_callbacks wrapper_cbs = {
339                 .begin_stream      = begin_extract_stream_with_progress,
340                 .begin_stream_ctx  = ctx,
341                 .consume_chunk     = consume_chunk_with_progress,
342                 .consume_chunk_ctx = ctx,
343                 .end_stream        = cbs->end_stream,
344                 .end_stream_ctx    = cbs->end_stream_ctx,
345         };
346         if (ctx->progfunc) {
347                 ctx->saved_cbs = cbs;
348                 cbs = &wrapper_cbs;
349         }
350         if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
351                 return load_streams_from_pipe(ctx, cbs);
352         } else {
353                 return read_stream_list(&ctx->stream_list,
354                                         offsetof(struct wim_lookup_table_entry,
355                                                  extraction_list),
356                                         cbs, VERIFY_STREAM_HASHES);
357         }
358 }
359
360 /* Extract a WIM dentry to standard output.
361  *
362  * This obviously doesn't make sense in all cases.  We return an error if the
363  * dentry does not correspond to a regular file.  Otherwise we extract the
364  * unnamed data stream only.  */
365 static int
366 extract_dentry_to_stdout(struct wim_dentry *dentry,
367                          const struct wim_lookup_table *lookup_table)
368 {
369         struct wim_inode *inode = dentry->d_inode;
370         struct wim_lookup_table_entry *lte;
371         struct filedes _stdout;
372
373         if (inode->i_attributes & (FILE_ATTRIBUTE_REPARSE_POINT |
374                                    FILE_ATTRIBUTE_DIRECTORY))
375         {
376                 ERROR("\"%"TS"\" is not a regular file and therefore cannot be "
377                       "extracted to standard output", dentry_full_path(dentry));
378                 return WIMLIB_ERR_NOT_A_REGULAR_FILE;
379         }
380
381         lte = inode_unnamed_lte(inode, lookup_table);
382         if (!lte) {
383                 const u8 *hash = inode_unnamed_stream_hash(inode);
384                 if (!is_zero_hash(hash))
385                         return stream_not_found_error(inode, hash);
386                 return 0;
387         }
388
389         filedes_init(&_stdout, STDOUT_FILENO);
390         return extract_full_stream_to_fd(lte, &_stdout);
391 }
392
393 static int
394 extract_dentries_to_stdout(struct wim_dentry **dentries, size_t num_dentries,
395                            const struct wim_lookup_table *lookup_table)
396 {
397         for (size_t i = 0; i < num_dentries; i++) {
398                 int ret = extract_dentry_to_stdout(dentries[i], lookup_table);
399                 if (ret)
400                         return ret;
401         }
402         return 0;
403 }
404
405 /**********************************************************************/
406
407 /*
408  * Removes duplicate dentries from the array.
409  *
410  * Returns the new number of dentries, packed at the front of the array.
411  */
412 static size_t
413 remove_duplicate_trees(struct wim_dentry **trees, size_t num_trees)
414 {
415         size_t i, j = 0;
416         for (i = 0; i < num_trees; i++) {
417                 if (!trees[i]->tmp_flag) {
418                         /* Found distinct dentry.  */
419                         trees[i]->tmp_flag = 1;
420                         trees[j++] = trees[i];
421                 }
422         }
423         for (i = 0; i < j; i++)
424                 trees[i]->tmp_flag = 0;
425         return j;
426 }
427
428 /*
429  * Remove dentries that are descendants of other dentries in the array.
430  *
431  * Returns the new number of dentries, packed at the front of the array.
432  */
433 static size_t
434 remove_contained_trees(struct wim_dentry **trees, size_t num_trees)
435 {
436         size_t i, j = 0;
437         for (i = 0; i < num_trees; i++)
438                 trees[i]->tmp_flag = 1;
439         for (i = 0; i < num_trees; i++) {
440                 struct wim_dentry *d = trees[i];
441                 while (!dentry_is_root(d)) {
442                         d = d->d_parent;
443                         if (d->tmp_flag)
444                                 goto tree_contained;
445                 }
446                 trees[j++] = trees[i];
447                 continue;
448
449         tree_contained:
450                 trees[i]->tmp_flag = 0;
451         }
452
453         for (i = 0; i < j; i++)
454                 trees[i]->tmp_flag = 0;
455         return j;
456 }
457
458 static int
459 dentry_append_to_list(struct wim_dentry *dentry, void *_dentry_list)
460 {
461         struct list_head *dentry_list = _dentry_list;
462         list_add_tail(&dentry->d_extraction_list_node, dentry_list);
463         return 0;
464 }
465
466 static void
467 dentry_reset_extraction_list_node(struct wim_dentry *dentry)
468 {
469         dentry->d_extraction_list_node = (struct list_head){NULL, NULL};
470 }
471
472 static int
473 dentry_delete_from_list(struct wim_dentry *dentry, void *_ignore)
474 {
475         list_del(&dentry->d_extraction_list_node);
476         dentry_reset_extraction_list_node(dentry);
477         return 0;
478 }
479
480 /*
481  * Build the preliminary list of dentries to be extracted.
482  *
483  * The list maintains the invariant that if d1 and d2 are in the list and d1 is
484  * an ancestor of d2, then d1 appears before d2 in the list.
485  */
486 static void
487 build_dentry_list(struct list_head *dentry_list, struct wim_dentry **trees,
488                   size_t num_trees, bool add_ancestors)
489 {
490         INIT_LIST_HEAD(dentry_list);
491
492         /* Add the trees recursively.  */
493         for (size_t i = 0; i < num_trees; i++)
494                 for_dentry_in_tree(trees[i], dentry_append_to_list, dentry_list);
495
496         /* If requested, add ancestors of the trees.  */
497         if (add_ancestors) {
498                 for (size_t i = 0; i < num_trees; i++) {
499                         struct wim_dentry *dentry = trees[i];
500                         struct wim_dentry *ancestor;
501                         struct list_head *place_after;
502
503                         if (dentry_is_root(dentry))
504                                 continue;
505
506                         place_after = dentry_list;
507                         ancestor = dentry;
508                         do {
509                                 ancestor = ancestor->d_parent;
510                                 if (will_extract_dentry(ancestor)) {
511                                         place_after = &ancestor->d_extraction_list_node;
512                                         break;
513                                 }
514                         } while (!dentry_is_root(ancestor));
515
516                         ancestor = dentry;
517                         do {
518                                 ancestor = ancestor->d_parent;
519                                 if (will_extract_dentry(ancestor))
520                                         break;
521                                 list_add(&ancestor->d_extraction_list_node, place_after);
522                         } while (!dentry_is_root(ancestor));
523                 }
524         }
525 }
526
527 static void
528 destroy_dentry_list(struct list_head *dentry_list)
529 {
530         struct wim_dentry *dentry, *tmp;
531         struct wim_inode *inode;
532
533         list_for_each_entry_safe(dentry, tmp, dentry_list, d_extraction_list_node) {
534                 inode = dentry->d_inode;
535                 dentry_reset_extraction_list_node(dentry);
536                 inode->i_visited = 0;
537                 if ((void *)dentry->d_extraction_name != (void *)dentry->file_name)
538                         FREE(dentry->d_extraction_name);
539                 dentry->d_extraction_name = NULL;
540                 dentry->d_extraction_name_nchars = 0;
541         }
542 }
543
544 static void
545 destroy_stream_list(struct list_head *stream_list)
546 {
547         struct wim_lookup_table_entry *lte;
548
549         list_for_each_entry(lte, stream_list, extraction_list)
550                 if (lte->out_refcnt > ARRAY_LEN(lte->inline_stream_owners))
551                         FREE(lte->stream_owners);
552 }
553
554 #ifdef __WIN32__
555 static const utf16lechar replacement_char = cpu_to_le16(0xfffd);
556 #else
557 static const utf16lechar replacement_char = cpu_to_le16('?');
558 #endif
559
560 static bool
561 file_name_valid(utf16lechar *name, size_t num_chars, bool fix)
562 {
563         size_t i;
564
565         if (num_chars == 0)
566                 return true;
567         for (i = 0; i < num_chars; i++) {
568                 switch (name[i]) {
569         #ifdef __WIN32__
570                 case cpu_to_le16('\\'):
571                 case cpu_to_le16(':'):
572                 case cpu_to_le16('*'):
573                 case cpu_to_le16('?'):
574                 case cpu_to_le16('"'):
575                 case cpu_to_le16('<'):
576                 case cpu_to_le16('>'):
577                 case cpu_to_le16('|'):
578         #endif
579                 case cpu_to_le16('/'):
580                 case cpu_to_le16('\0'):
581                         if (fix)
582                                 name[i] = replacement_char;
583                         else
584                                 return false;
585                 }
586         }
587
588 #ifdef __WIN32__
589         if (name[num_chars - 1] == cpu_to_le16(' ') ||
590             name[num_chars - 1] == cpu_to_le16('.'))
591         {
592                 if (fix)
593                         name[num_chars - 1] = replacement_char;
594                 else
595                         return false;
596         }
597 #endif
598         return true;
599 }
600
601 static int
602 dentry_calculate_extraction_name(struct wim_dentry *dentry,
603                                  struct apply_ctx *ctx)
604 {
605         int ret;
606
607         if (!dentry_is_supported(dentry, &ctx->supported_features))
608                 goto skip_dentry;
609
610         if (dentry_is_root(dentry))
611                 return 0;
612
613         if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
614                 dentry->d_extraction_name = dentry->file_name;
615                 dentry->d_extraction_name_nchars = dentry->file_name_nbytes /
616                                                    sizeof(utf16lechar);
617                 return 0;
618         }
619
620         if (!ctx->supported_features.case_sensitive_filenames) {
621                 struct wim_dentry *other;
622                 list_for_each_entry(other, &dentry->d_ci_conflict_list,
623                                     d_ci_conflict_list)
624                 {
625                         if (will_extract_dentry(other)) {
626                                 if (ctx->extract_flags &
627                                     WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS) {
628                                         WARNING("\"%"TS"\" has the same "
629                                                 "case-insensitive name as "
630                                                 "\"%"TS"\"; extracting "
631                                                 "dummy name instead",
632                                                 dentry_full_path(dentry),
633                                                 dentry_full_path(other));
634                                         goto out_replace;
635                                 } else {
636                                         WARNING("Not extracting \"%"TS"\": "
637                                                 "has same case-insensitive "
638                                                 "name as \"%"TS"\"",
639                                                 dentry_full_path(dentry),
640                                                 dentry_full_path(other));
641                                         goto skip_dentry;
642                                 }
643                         }
644                 }
645         }
646
647         if (file_name_valid(dentry->file_name, dentry->file_name_nbytes / 2, false)) {
648                 ret = utf16le_get_tstr(dentry->file_name,
649                                        dentry->file_name_nbytes,
650                                        (const tchar **)&dentry->d_extraction_name,
651                                        &dentry->d_extraction_name_nchars);
652                 dentry->d_extraction_name_nchars /= sizeof(tchar);
653                 return ret;
654         } else {
655                 if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES)
656                 {
657                         WARNING("\"%"TS"\" has an invalid filename "
658                                 "that is not supported on this platform; "
659                                 "extracting dummy name instead",
660                                 dentry_full_path(dentry));
661                         goto out_replace;
662                 } else {
663                         WARNING("Not extracting \"%"TS"\": has an invalid filename "
664                                 "that is not supported on this platform",
665                                 dentry_full_path(dentry));
666                         goto skip_dentry;
667                 }
668         }
669
670 out_replace:
671         {
672                 utf16lechar utf16_name_copy[dentry->file_name_nbytes / 2];
673
674                 memcpy(utf16_name_copy, dentry->file_name, dentry->file_name_nbytes);
675                 file_name_valid(utf16_name_copy, dentry->file_name_nbytes / 2, true);
676
677                 const tchar *tchar_name;
678                 size_t tchar_nchars;
679
680                 ret = utf16le_get_tstr(utf16_name_copy,
681                                        dentry->file_name_nbytes,
682                                        &tchar_name, &tchar_nchars);
683                 if (ret)
684                         return ret;
685
686                 tchar_nchars /= sizeof(tchar);
687
688                 size_t fixed_name_num_chars = tchar_nchars;
689                 tchar fixed_name[tchar_nchars + 50];
690
691                 tmemcpy(fixed_name, tchar_name, tchar_nchars);
692                 fixed_name_num_chars += tsprintf(fixed_name + tchar_nchars,
693                                                  T(" (invalid filename #%lu)"),
694                                                  ++ctx->invalid_sequence);
695
696                 utf16le_put_tstr(tchar_name);
697
698                 dentry->d_extraction_name = memdup(fixed_name,
699                                                    2 * fixed_name_num_chars + 2);
700                 if (!dentry->d_extraction_name)
701                         return WIMLIB_ERR_NOMEM;
702                 dentry->d_extraction_name_nchars = fixed_name_num_chars;
703         }
704         return 0;
705
706 skip_dentry:
707         for_dentry_in_tree(dentry, dentry_delete_from_list, NULL);
708         return 0;
709 }
710
711 /*
712  * Calculate the actual filename component at which each WIM dentry will be
713  * extracted, with special handling for dentries that are unsupported by the
714  * extraction backend or have invalid names.
715  *
716  * ctx->supported_features must be filled in.
717  *
718  * Possible error codes: WIMLIB_ERR_NOMEM, WIMLIB_ERR_INVALID_UTF16_STRING
719  */
720 static int
721 dentry_list_calculate_extraction_names(struct list_head *dentry_list,
722                                        struct apply_ctx *ctx)
723 {
724         struct list_head *prev, *cur;
725
726         /* Can't use list_for_each_entry() because a call to
727          * dentry_calculate_extraction_name() may delete the current dentry and
728          * its children from the list.  */
729
730         prev = dentry_list;
731         for (;;) {
732                 struct wim_dentry *dentry;
733                 int ret;
734
735                 cur = prev->next;
736                 if (cur == dentry_list)
737                         break;
738
739                 dentry = list_entry(cur, struct wim_dentry, d_extraction_list_node);
740
741                 ret = dentry_calculate_extraction_name(dentry, ctx);
742                 if (ret)
743                         return ret;
744
745                 if (prev->next == cur)
746                         prev = cur;
747                 else
748                         ; /* Current dentry and its children (which follow in
749                              the list) were deleted.  prev stays the same.  */
750         }
751         return 0;
752 }
753
754 static int
755 dentry_resolve_streams(struct wim_dentry *dentry, int extract_flags,
756                        struct wim_lookup_table *lookup_table)
757 {
758         struct wim_inode *inode = dentry->d_inode;
759         struct wim_lookup_table_entry *lte;
760         int ret;
761         bool force = false;
762
763         /* Special case:  when extracting from a pipe, the WIM lookup table is
764          * initially empty, so "resolving" an inode's streams is initially not
765          * possible.  However, we still need to keep track of which streams,
766          * identified by SHA1 message digests, need to be extracted, so we
767          * "resolve" the inode's streams anyway by allocating new entries.  */
768         if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE)
769                 force = true;
770         ret = inode_resolve_streams(inode, lookup_table, force);
771         if (ret)
772                 return ret;
773         for (u32 i = 0; i <= inode->i_num_ads; i++) {
774                 lte = inode_stream_lte_resolved(inode, i);
775                 if (lte)
776                         lte->out_refcnt = 0;
777         }
778         return 0;
779 }
780
781 /*
782  * For each dentry to be extracted, resolve all streams in the corresponding
783  * inode and set 'out_refcnt' in each to 0.
784  *
785  * Possible error codes: WIMLIB_ERR_RESOURCE_NOT_FOUND, WIMLIB_ERR_NOMEM.
786  */
787 static int
788 dentry_list_resolve_streams(struct list_head *dentry_list,
789                             struct apply_ctx *ctx)
790 {
791         struct wim_dentry *dentry;
792         int ret;
793
794         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
795                 ret = dentry_resolve_streams(dentry,
796                                              ctx->extract_flags,
797                                              ctx->wim->lookup_table);
798                 if (ret)
799                         return ret;
800         }
801         return 0;
802 }
803
804 static int
805 ref_stream(struct wim_lookup_table_entry *lte, u32 stream_idx,
806            struct wim_dentry *dentry, struct apply_ctx *ctx)
807 {
808         struct wim_inode *inode = dentry->d_inode;
809         struct stream_owner *stream_owners;
810
811         if (!lte)
812                 return 0;
813
814         /* Tally the size only for each extraction of the stream (not hard
815          * links).  */
816         if (inode->i_visited && ctx->supported_features.hard_links)
817                 return 0;
818
819         ctx->progress.extract.total_bytes += lte->size;
820         ctx->progress.extract.num_streams++;
821
822         if (inode->i_visited)
823                 return 0;
824
825         /* Add stream to the dentry_list only one time, even if it's going
826          * to be extracted to multiple inodes.  */
827         if (lte->out_refcnt == 0) {
828                 list_add_tail(&lte->extraction_list, &ctx->stream_list);
829                 ctx->num_streams_remaining++;
830         }
831
832         /* If inode not yet been visited, append it to the stream_owners array.  */
833         if (lte->out_refcnt < ARRAY_LEN(lte->inline_stream_owners)) {
834                 stream_owners = lte->inline_stream_owners;
835         } else {
836                 struct stream_owner *prev_stream_owners;
837                 size_t alloc_stream_owners;
838
839                 if (lte->out_refcnt == ARRAY_LEN(lte->inline_stream_owners)) {
840                         prev_stream_owners = NULL;
841                         alloc_stream_owners = ARRAY_LEN(lte->inline_stream_owners);
842                 } else {
843                         prev_stream_owners = lte->stream_owners;
844                         alloc_stream_owners = lte->alloc_stream_owners;
845                 }
846
847                 if (lte->out_refcnt == alloc_stream_owners) {
848                         alloc_stream_owners *= 2;
849                         stream_owners = REALLOC(prev_stream_owners,
850                                                alloc_stream_owners *
851                                                 sizeof(stream_owners[0]));
852                         if (!stream_owners)
853                                 return WIMLIB_ERR_NOMEM;
854                         if (!prev_stream_owners) {
855                                 memcpy(stream_owners,
856                                        lte->inline_stream_owners,
857                                        sizeof(lte->inline_stream_owners));
858                         }
859                         lte->stream_owners = stream_owners;
860                         lte->alloc_stream_owners = alloc_stream_owners;
861                 }
862                 stream_owners = lte->stream_owners;
863         }
864         stream_owners[lte->out_refcnt].inode = inode;
865         if (stream_idx == 0) {
866                 stream_owners[lte->out_refcnt].stream_name = NULL;
867         } else {
868                 stream_owners[lte->out_refcnt].stream_name =
869                         inode->i_ads_entries[stream_idx - 1].stream_name;
870         }
871         lte->out_refcnt++;
872         return 0;
873 }
874
875 static int
876 dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx)
877 {
878         struct wim_inode *inode = dentry->d_inode;
879         int ret;
880
881         /* The unnamed data stream will always be extracted, except in an
882          * unlikely case.  */
883         if (!inode_is_encrypted_directory(inode)) {
884                 u16 stream_idx;
885                 struct wim_lookup_table_entry *stream;
886
887                 stream = inode_unnamed_stream_resolved(inode, &stream_idx);
888                 ret = ref_stream(stream, stream_idx, dentry, ctx);
889                 if (ret)
890                         return ret;
891         }
892
893         /* Named data streams will be extracted only if supported in the current
894          * extraction mode and volume, and to avoid complications, if not doing
895          * a linked extraction.  */
896         if (ctx->supported_features.named_data_streams) {
897                 for (u16 i = 0; i < inode->i_num_ads; i++) {
898                         if (!ads_entry_is_named_stream(&inode->i_ads_entries[i]))
899                                 continue;
900                         ret = ref_stream(inode->i_ads_entries[i].lte, i + 1,
901                                          dentry, ctx);
902                         if (ret)
903                                 return ret;
904                 }
905         }
906         inode->i_visited = 1;
907         return 0;
908 }
909
910 /*
911  * For each dentry to be extracted, iterate through the data streams of the
912  * corresponding inode.  For each such stream that is not to be ignored due to
913  * the supported features or extraction flags, add it to the list of streams to
914  * be extracted (ctx->stream_list) if not already done so.
915  *
916  * Also builds a mapping from each stream to the inodes referencing it.
917  *
918  * This also initializes the extract progress info with byte and stream
919  * information.
920  *
921  * ctx->supported_features must be filled in.
922  *
923  * Possible error codes: WIMLIB_ERR_NOMEM.
924  */
925 static int
926 dentry_list_ref_streams(struct list_head *dentry_list, struct apply_ctx *ctx)
927 {
928         struct wim_dentry *dentry;
929         int ret;
930
931         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
932                 ret = dentry_ref_streams(dentry, ctx);
933                 if (ret)
934                         return ret;
935         }
936         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
937                 dentry->d_inode->i_visited = 0;
938         return 0;
939 }
940
941 static void
942 dentry_list_build_inode_alias_lists(struct list_head *dentry_list)
943 {
944         struct wim_dentry *dentry;
945         struct wim_inode *inode;
946
947         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
948                 inode = dentry->d_inode;
949                 if (!inode->i_visited)
950                         INIT_LIST_HEAD(&inode->i_extraction_aliases);
951                 list_add_tail(&dentry->d_extraction_alias_node,
952                               &inode->i_extraction_aliases);
953                 inode->i_visited = 1;
954         }
955         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
956                 dentry->d_inode->i_visited = 0;
957 }
958
959 static void
960 inode_tally_features(const struct wim_inode *inode,
961                      struct wim_features *features)
962 {
963         if (inode->i_attributes & FILE_ATTRIBUTE_ARCHIVE)
964                 features->archive_files++;
965         if (inode->i_attributes & FILE_ATTRIBUTE_HIDDEN)
966                 features->hidden_files++;
967         if (inode->i_attributes & FILE_ATTRIBUTE_SYSTEM)
968                 features->system_files++;
969         if (inode->i_attributes & FILE_ATTRIBUTE_COMPRESSED)
970                 features->compressed_files++;
971         if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
972                 if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)
973                         features->encrypted_directories++;
974                 else
975                         features->encrypted_files++;
976         }
977         if (inode->i_attributes & FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)
978                 features->not_context_indexed_files++;
979         if (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE)
980                 features->sparse_files++;
981         if (inode_has_named_stream(inode))
982                 features->named_data_streams++;
983         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
984                 features->reparse_points++;
985                 if (inode_is_symlink(inode))
986                         features->symlink_reparse_points++;
987                 else
988                         features->other_reparse_points++;
989         }
990         if (inode->i_security_id != -1)
991                 features->security_descriptors++;
992         if (inode_has_unix_data(inode))
993                 features->unix_data++;
994 }
995
996 /* Tally features necessary to extract a dentry and the corresponding inode.  */
997 static void
998 dentry_tally_features(struct wim_dentry *dentry, struct wim_features *features)
999 {
1000         struct wim_inode *inode = dentry->d_inode;
1001
1002         if (dentry_has_short_name(dentry))
1003                 features->short_names++;
1004
1005         if (inode->i_visited) {
1006                 features->hard_links++;
1007         } else {
1008                 inode_tally_features(inode, features);
1009                 inode->i_visited = 1;
1010         }
1011 }
1012
1013 /* Tally the features necessary to extract the specified dentries.  */
1014 static void
1015 dentry_list_get_features(struct list_head *dentry_list,
1016                          struct wim_features *features)
1017 {
1018         struct wim_dentry *dentry;
1019
1020         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
1021                 dentry_tally_features(dentry, features);
1022
1023         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
1024                 dentry->d_inode->i_visited = 0;
1025 }
1026
1027 static int
1028 do_feature_check(const struct wim_features *required_features,
1029                  const struct wim_features *supported_features,
1030                  int extract_flags)
1031 {
1032         /* File attributes.  */
1033         if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES)) {
1034                 /* Note: Don't bother the user about FILE_ATTRIBUTE_ARCHIVE.
1035                  * We're an archive program, so theoretically we can do what we
1036                  * want with it.  */
1037
1038                 if (required_features->hidden_files &&
1039                     !supported_features->hidden_files)
1040                         WARNING("Ignoring FILE_ATTRIBUTE_HIDDEN of %lu files",
1041                                 required_features->hidden_files);
1042
1043                 if (required_features->system_files &&
1044                     !supported_features->system_files)
1045                         WARNING("Ignoring FILE_ATTRIBUTE_SYSTEM of %lu files",
1046                                 required_features->system_files);
1047
1048                 if (required_features->compressed_files &&
1049                     !supported_features->compressed_files)
1050                         WARNING("Ignoring FILE_ATTRIBUTE_COMPRESSED of %lu files",
1051                                 required_features->compressed_files);
1052
1053                 if (required_features->not_context_indexed_files &&
1054                     !supported_features->not_context_indexed_files)
1055                         WARNING("Ignoring FILE_ATTRIBUTE_NOT_CONTENT_INDEXED of %lu files",
1056                                 required_features->not_context_indexed_files);
1057
1058                 if (required_features->sparse_files &&
1059                     !supported_features->sparse_files)
1060                         WARNING("Ignoring FILE_ATTRIBUTE_SPARSE_FILE of %lu files",
1061                                 required_features->sparse_files);
1062
1063                 if (required_features->encrypted_directories &&
1064                     !supported_features->encrypted_directories)
1065                         WARNING("Ignoring FILE_ATTRIBUTE_ENCRYPTED of %lu directories",
1066                                 required_features->encrypted_directories);
1067         }
1068
1069         /* Encrypted files.  */
1070         if (required_features->encrypted_files &&
1071             !supported_features->encrypted_files)
1072                 WARNING("Ignoring %lu encrypted files",
1073                         required_features->encrypted_files);
1074
1075         /* Named data streams.  */
1076         if (required_features->named_data_streams &&
1077             (!supported_features->named_data_streams))
1078                 WARNING("Ignoring named data streams of %lu files",
1079                         required_features->named_data_streams);
1080
1081         /* Hard links.  */
1082         if (required_features->hard_links && !supported_features->hard_links)
1083                 WARNING("Extracting %lu hard links as independent files",
1084                         required_features->hard_links);
1085
1086         /* Symbolic links and reparse points.  */
1087         if ((extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_SYMLINKS) &&
1088             required_features->symlink_reparse_points &&
1089             !supported_features->symlink_reparse_points &&
1090             !supported_features->reparse_points)
1091         {
1092                 ERROR("Extraction backend does not support symbolic links!");
1093                 return WIMLIB_ERR_UNSUPPORTED;
1094         }
1095         if (required_features->reparse_points &&
1096             !supported_features->reparse_points)
1097         {
1098                 if (supported_features->symlink_reparse_points) {
1099                         if (required_features->other_reparse_points) {
1100                                 WARNING("Ignoring %lu non-symlink/junction "
1101                                         "reparse point files",
1102                                         required_features->other_reparse_points);
1103                         }
1104                 } else {
1105                         WARNING("Ignoring %lu reparse point files",
1106                                 required_features->reparse_points);
1107                 }
1108         }
1109
1110         /* Security descriptors.  */
1111         if (((extract_flags & (WIMLIB_EXTRACT_FLAG_STRICT_ACLS |
1112                                WIMLIB_EXTRACT_FLAG_UNIX_DATA))
1113              == WIMLIB_EXTRACT_FLAG_STRICT_ACLS) &&
1114             required_features->security_descriptors &&
1115             !supported_features->security_descriptors)
1116         {
1117                 ERROR("Extraction backend does not support security descriptors!");
1118                 return WIMLIB_ERR_UNSUPPORTED;
1119         }
1120         if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ACLS) &&
1121             required_features->security_descriptors &&
1122             !supported_features->security_descriptors)
1123                 WARNING("Ignoring Windows NT security descriptors of %lu files",
1124                         required_features->security_descriptors);
1125
1126         /* UNIX data.  */
1127         if ((extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) &&
1128             required_features->unix_data && !supported_features->unix_data)
1129         {
1130                 ERROR("Extraction backend does not support UNIX data!");
1131                 return WIMLIB_ERR_UNSUPPORTED;
1132         }
1133
1134         if (required_features->unix_data &&
1135             !(extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA))
1136         {
1137                 WARNING("Ignoring UNIX metadata of %lu files",
1138                         required_features->unix_data);
1139         }
1140
1141         /* DOS Names.  */
1142         if (required_features->short_names &&
1143             !supported_features->short_names)
1144         {
1145                 if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_SHORT_NAMES) {
1146                         ERROR("Extraction backend does not support DOS names!");
1147                         return WIMLIB_ERR_UNSUPPORTED;
1148                 }
1149                 WARNING("Ignoring DOS names of %lu files",
1150                         required_features->short_names);
1151         }
1152
1153         /* Timestamps.  */
1154         if ((extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS) &&
1155             !supported_features->timestamps)
1156         {
1157                 ERROR("Extraction backend does not support timestamps!");
1158                 return WIMLIB_ERR_UNSUPPORTED;
1159         }
1160
1161         return 0;
1162 }
1163
1164 static const struct apply_operations *
1165 select_apply_operations(int extract_flags)
1166 {
1167 #ifdef WITH_NTFS_3G
1168         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS)
1169                 return &ntfs_3g_apply_ops;
1170 #endif
1171 #ifdef __WIN32__
1172         return &win32_apply_ops;
1173 #else
1174         return &unix_apply_ops;
1175 #endif
1176 }
1177
1178 static int
1179 extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
1180               const tchar *target, int extract_flags)
1181 {
1182         const struct apply_operations *ops;
1183         struct apply_ctx *ctx;
1184         int ret;
1185         LIST_HEAD(dentry_list);
1186
1187         if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) {
1188                 ret = extract_dentries_to_stdout(trees, num_trees,
1189                                                  wim->lookup_table);
1190                 goto out;
1191         }
1192
1193         num_trees = remove_duplicate_trees(trees, num_trees);
1194         num_trees = remove_contained_trees(trees, num_trees);
1195
1196         ops = select_apply_operations(extract_flags);
1197
1198         if (num_trees > 1 && ops->single_tree_only) {
1199                 ERROR("Extracting multiple directory trees "
1200                       "at once is not supported in %s extraction mode!",
1201                       ops->name);
1202                 ret = WIMLIB_ERR_UNSUPPORTED;
1203                 goto out;
1204         }
1205
1206         ctx = CALLOC(1, ops->context_size);
1207         if (!ctx) {
1208                 ret = WIMLIB_ERR_NOMEM;
1209                 goto out;
1210         }
1211
1212         ctx->wim = wim;
1213         ctx->target = target;
1214         ctx->target_nchars = tstrlen(target);
1215         ctx->extract_flags = extract_flags;
1216         if (ctx->wim->progfunc) {
1217                 ctx->progfunc = ctx->wim->progfunc;
1218                 ctx->progctx = ctx->wim->progctx;
1219                 ctx->progress.extract.image = wim->current_image;
1220                 ctx->progress.extract.extract_flags = (extract_flags &
1221                                                        WIMLIB_EXTRACT_MASK_PUBLIC);
1222                 ctx->progress.extract.wimfile_name = wim->filename;
1223                 ctx->progress.extract.image_name = wimlib_get_image_name(wim,
1224                                                                          wim->current_image);
1225                 ctx->progress.extract.target = target;
1226         }
1227         INIT_LIST_HEAD(&ctx->stream_list);
1228
1229         ret = (*ops->get_supported_features)(target, &ctx->supported_features);
1230         if (ret)
1231                 goto out_cleanup;
1232
1233         build_dentry_list(&dentry_list, trees, num_trees,
1234                           !(extract_flags &
1235                             WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE));
1236
1237         dentry_list_get_features(&dentry_list, &ctx->required_features);
1238
1239         ret = do_feature_check(&ctx->required_features, &ctx->supported_features,
1240                                ctx->extract_flags);
1241         if (ret)
1242                 goto out_cleanup;
1243
1244         ret = dentry_list_calculate_extraction_names(&dentry_list, ctx);
1245         if (ret)
1246                 goto out_cleanup;
1247
1248         ret = dentry_list_resolve_streams(&dentry_list, ctx);
1249         if (ret)
1250                 goto out_cleanup;
1251
1252         ret = dentry_list_ref_streams(&dentry_list, ctx);
1253         if (ret)
1254                 goto out_cleanup;
1255
1256         dentry_list_build_inode_alias_lists(&dentry_list);
1257
1258         if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
1259                 /* When extracting from a pipe, the number of bytes of data to
1260                  * extract can't be determined in the normal way (examining the
1261                  * lookup table), since at this point all we have is a set of
1262                  * SHA1 message digests of streams that need to be extracted.
1263                  * However, we can get a reasonably accurate estimate by taking
1264                  * <TOTALBYTES> from the corresponding <IMAGE> in the WIM XML
1265                  * data.  This does assume that a full image is being extracted,
1266                  * but currently there is no API for doing otherwise.  (Also,
1267                  * subtract <HARDLINKBYTES> from this if hard links are
1268                  * supported by the extraction mode.)  */
1269                 ctx->progress.extract.total_bytes =
1270                         wim_info_get_image_total_bytes(wim->wim_info,
1271                                                        wim->current_image);
1272                 if (ctx->supported_features.hard_links) {
1273                         ctx->progress.extract.total_bytes -=
1274                                 wim_info_get_image_hard_link_bytes(wim->wim_info,
1275                                                                    wim->current_image);
1276                 }
1277         }
1278
1279         ret = extract_progress(ctx,
1280                                ((extract_flags & WIMLIB_EXTRACT_FLAG_IMAGEMODE) ?
1281                                        WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN :
1282                                        WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN));
1283         if (ret)
1284                 goto out_cleanup;
1285
1286         ret = (*ops->extract)(&dentry_list, ctx);
1287         if (ret)
1288                 goto out_cleanup;
1289
1290         if (ctx->progress.extract.completed_bytes <
1291             ctx->progress.extract.total_bytes)
1292         {
1293                 ctx->progress.extract.completed_bytes =
1294                         ctx->progress.extract.total_bytes;
1295                 ret = extract_progress(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS);
1296                 if (ret)
1297                         goto out_cleanup;
1298         }
1299
1300         ret = extract_progress(ctx,
1301                                ((extract_flags & WIMLIB_EXTRACT_FLAG_IMAGEMODE) ?
1302                                        WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END :
1303                                        WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END));
1304 out_cleanup:
1305         destroy_stream_list(&ctx->stream_list);
1306         destroy_dentry_list(&dentry_list);
1307         FREE(ctx);
1308 out:
1309         return ret;
1310 }
1311
1312 static int
1313 mkdir_if_needed(const tchar *target)
1314 {
1315         struct stat stbuf;
1316         if (tstat(target, &stbuf)) {
1317                 if (errno == ENOENT) {
1318                         if (tmkdir(target, 0755)) {
1319                                 ERROR_WITH_ERRNO("Failed to create directory "
1320                                                  "\"%"TS"\"", target);
1321                                 return WIMLIB_ERR_MKDIR;
1322                         }
1323                 } else {
1324                         ERROR_WITH_ERRNO("Failed to stat \"%"TS"\"", target);
1325                         return WIMLIB_ERR_STAT;
1326                 }
1327         } else if (!S_ISDIR(stbuf.st_mode)) {
1328                 ERROR("\"%"TS"\" is not a directory", target);
1329                 return WIMLIB_ERR_NOTDIR;
1330         }
1331         return 0;
1332 }
1333
1334 /* Make sure the extraction flags make sense, and update them if needed.  */
1335 static int
1336 check_extract_flags(const WIMStruct *wim, int *extract_flags_p)
1337 {
1338         int extract_flags = *extract_flags_p;
1339
1340         /* Check for invalid flag combinations  */
1341
1342         if ((extract_flags &
1343              (WIMLIB_EXTRACT_FLAG_NO_ACLS |
1344               WIMLIB_EXTRACT_FLAG_STRICT_ACLS)) == (WIMLIB_EXTRACT_FLAG_NO_ACLS |
1345                                                     WIMLIB_EXTRACT_FLAG_STRICT_ACLS))
1346                 return WIMLIB_ERR_INVALID_PARAM;
1347
1348         if ((extract_flags &
1349              (WIMLIB_EXTRACT_FLAG_RPFIX |
1350               WIMLIB_EXTRACT_FLAG_NORPFIX)) == (WIMLIB_EXTRACT_FLAG_RPFIX |
1351                                                 WIMLIB_EXTRACT_FLAG_NORPFIX))
1352                 return WIMLIB_ERR_INVALID_PARAM;
1353
1354 #ifndef WITH_NTFS_3G
1355         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
1356                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
1357                       "        it cannot apply a WIM image directly to an NTFS volume.");
1358                 return WIMLIB_ERR_UNSUPPORTED;
1359         }
1360 #endif
1361
1362 #ifndef __WIN32__
1363         if (extract_flags & WIMLIB_EXTRACT_FLAG_WIMBOOT) {
1364                 ERROR("WIMBoot extraction is only supported on Windows!");
1365                 return WIMLIB_ERR_UNSUPPORTED;
1366         }
1367 #endif
1368
1369         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
1370                               WIMLIB_EXTRACT_FLAG_NORPFIX |
1371                               WIMLIB_EXTRACT_FLAG_IMAGEMODE)) ==
1372                                         WIMLIB_EXTRACT_FLAG_IMAGEMODE)
1373         {
1374                 /* For full-image extraction, do reparse point fixups by default
1375                  * if the WIM header says they are enabled.  */
1376                 if (wim->hdr.flags & WIM_HDR_FLAG_RP_FIX)
1377                         extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX;
1378         }
1379
1380         *extract_flags_p = extract_flags;
1381         return 0;
1382 }
1383
1384 static u32
1385 get_wildcard_flags(int extract_flags)
1386 {
1387         u32 wildcard_flags = 0;
1388
1389         if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_GLOB)
1390                 wildcard_flags |= WILDCARD_FLAG_ERROR_IF_NO_MATCH;
1391         else
1392                 wildcard_flags |= WILDCARD_FLAG_WARN_IF_NO_MATCH;
1393
1394         if (default_ignore_case)
1395                 wildcard_flags |= WILDCARD_FLAG_CASE_INSENSITIVE;
1396
1397         return wildcard_flags;
1398 }
1399
1400 struct append_dentry_ctx {
1401         struct wim_dentry **dentries;
1402         size_t num_dentries;
1403         size_t num_alloc_dentries;
1404 };
1405
1406 static int
1407 append_dentry_cb(struct wim_dentry *dentry, void *_ctx)
1408 {
1409         struct append_dentry_ctx *ctx = _ctx;
1410
1411         if (ctx->num_dentries == ctx->num_alloc_dentries) {
1412                 struct wim_dentry **new_dentries;
1413                 size_t new_length;
1414
1415                 new_length = max(ctx->num_alloc_dentries + 8,
1416                                  ctx->num_alloc_dentries * 3 / 2);
1417                 new_dentries = REALLOC(ctx->dentries,
1418                                        new_length * sizeof(ctx->dentries[0]));
1419                 if (new_dentries == NULL)
1420                         return WIMLIB_ERR_NOMEM;
1421                 ctx->dentries = new_dentries;
1422                 ctx->num_alloc_dentries = new_length;
1423         }
1424         ctx->dentries[ctx->num_dentries++] = dentry;
1425         return 0;
1426 }
1427
1428 static int
1429 do_wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target,
1430                         const tchar * const *paths, size_t num_paths,
1431                         int extract_flags)
1432 {
1433         int ret;
1434         struct wim_dentry **trees;
1435         size_t num_trees;
1436
1437         if (wim == NULL || target == NULL || target[0] == T('\0') ||
1438             (num_paths != 0 && paths == NULL))
1439                 return WIMLIB_ERR_INVALID_PARAM;
1440
1441         ret = check_extract_flags(wim, &extract_flags);
1442         if (ret)
1443                 return ret;
1444
1445         ret = select_wim_image(wim, image);
1446         if (ret)
1447                 return ret;
1448
1449         ret = wim_checksum_unhashed_streams(wim);
1450         if (ret)
1451                 return ret;
1452
1453         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_NTFS |
1454                               WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE)) ==
1455             (WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE))
1456         {
1457                 ret = mkdir_if_needed(target);
1458                 if (ret)
1459                         return ret;
1460         }
1461
1462         if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) {
1463
1464                 struct append_dentry_ctx append_dentry_ctx = {
1465                         .dentries = NULL,
1466                         .num_dentries = 0,
1467                         .num_alloc_dentries = 0,
1468                 };
1469
1470                 u32 wildcard_flags = get_wildcard_flags(extract_flags);
1471
1472                 for (size_t i = 0; i < num_paths; i++) {
1473                         tchar *path = canonicalize_wim_path(paths[i]);
1474                         if (path == NULL) {
1475                                 ret = WIMLIB_ERR_NOMEM;
1476                                 trees = append_dentry_ctx.dentries;
1477                                 goto out_free_trees;
1478                         }
1479                         ret = expand_wildcard(wim, path,
1480                                               append_dentry_cb,
1481                                               &append_dentry_ctx,
1482                                               wildcard_flags);
1483                         FREE(path);
1484                         if (ret) {
1485                                 trees = append_dentry_ctx.dentries;
1486                                 goto out_free_trees;
1487                         }
1488                 }
1489                 trees = append_dentry_ctx.dentries;
1490                 num_trees = append_dentry_ctx.num_dentries;
1491         } else {
1492                 trees = MALLOC(num_paths * sizeof(trees[0]));
1493                 if (trees == NULL)
1494                         return WIMLIB_ERR_NOMEM;
1495
1496                 for (size_t i = 0; i < num_paths; i++) {
1497
1498                         tchar *path = canonicalize_wim_path(paths[i]);
1499                         if (path == NULL) {
1500                                 ret = WIMLIB_ERR_NOMEM;
1501                                 goto out_free_trees;
1502                         }
1503
1504                         trees[i] = get_dentry(wim, path,
1505                                               WIMLIB_CASE_PLATFORM_DEFAULT);
1506                         FREE(path);
1507                         if (trees[i] == NULL) {
1508                                   ERROR("Path \"%"TS"\" does not exist "
1509                                         "in WIM image %d",
1510                                         paths[i], wim->current_image);
1511                                   ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
1512                                   goto out_free_trees;
1513                         }
1514                 }
1515                 num_trees = num_paths;
1516         }
1517
1518         if (num_trees == 0) {
1519                 ret = 0;
1520                 goto out_free_trees;
1521         }
1522
1523         ret = extract_trees(wim, trees, num_trees, target, extract_flags);
1524 out_free_trees:
1525         FREE(trees);
1526         return ret;
1527 }
1528
1529 static int
1530 extract_single_image(WIMStruct *wim, int image,
1531                      const tchar *target, int extract_flags)
1532 {
1533         const tchar *path = WIMLIB_WIM_ROOT_PATH;
1534         extract_flags |= WIMLIB_EXTRACT_FLAG_IMAGEMODE;
1535         return do_wimlib_extract_paths(wim, image, target, &path, 1, extract_flags);
1536 }
1537
1538 static const tchar * const filename_forbidden_chars =
1539 T(
1540 #ifdef __WIN32__
1541 "<>:\"/\\|?*"
1542 #else
1543 "/"
1544 #endif
1545 );
1546
1547 /* This function checks if it is okay to use a WIM image's name as a directory
1548  * name.  */
1549 static bool
1550 image_name_ok_as_dir(const tchar *image_name)
1551 {
1552         return image_name && *image_name &&
1553                 !tstrpbrk(image_name, filename_forbidden_chars) &&
1554                 tstrcmp(image_name, T(".")) &&
1555                 tstrcmp(image_name, T(".."));
1556 }
1557
1558 /* Extracts all images from the WIM to the directory @target, with the images
1559  * placed in subdirectories named by their image names. */
1560 static int
1561 extract_all_images(WIMStruct *wim, const tchar *target, int extract_flags)
1562 {
1563         size_t image_name_max_len = max(xml_get_max_image_name_len(wim), 20);
1564         size_t output_path_len = tstrlen(target);
1565         tchar buf[output_path_len + 1 + image_name_max_len + 1];
1566         int ret;
1567         int image;
1568         const tchar *image_name;
1569
1570         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
1571                 ERROR("Cannot extract multiple images in NTFS extraction mode.");
1572                 return WIMLIB_ERR_INVALID_PARAM;
1573         }
1574
1575         ret = mkdir_if_needed(target);
1576         if (ret)
1577                 return ret;
1578         tmemcpy(buf, target, output_path_len);
1579         buf[output_path_len] = OS_PREFERRED_PATH_SEPARATOR;
1580         for (image = 1; image <= wim->hdr.image_count; image++) {
1581                 image_name = wimlib_get_image_name(wim, image);
1582                 if (image_name_ok_as_dir(image_name)) {
1583                         tstrcpy(buf + output_path_len + 1, image_name);
1584                 } else {
1585                         /* Image name is empty or contains forbidden characters.
1586                          * Use image number instead. */
1587                         tsprintf(buf + output_path_len + 1, T("%d"), image);
1588                 }
1589                 ret = extract_single_image(wim, image, buf, extract_flags);
1590                 if (ret)
1591                         return ret;
1592         }
1593         return 0;
1594 }
1595
1596 static int
1597 do_wimlib_extract_image(WIMStruct *wim, int image, const tchar *target,
1598                         int extract_flags)
1599 {
1600         if (extract_flags & (WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE |
1601                              WIMLIB_EXTRACT_FLAG_TO_STDOUT |
1602                              WIMLIB_EXTRACT_FLAG_GLOB_PATHS))
1603                 return WIMLIB_ERR_INVALID_PARAM;
1604
1605         if (image == WIMLIB_ALL_IMAGES)
1606                 return extract_all_images(wim, target, extract_flags);
1607         else
1608                 return extract_single_image(wim, image, target, extract_flags);
1609 }
1610
1611
1612 /****************************************************************************
1613  *                          Extraction API                                  *
1614  ****************************************************************************/
1615
1616 WIMLIBAPI int
1617 wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target,
1618                      const tchar * const *paths, size_t num_paths,
1619                      int extract_flags)
1620 {
1621         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
1622                 return WIMLIB_ERR_INVALID_PARAM;
1623
1624         return do_wimlib_extract_paths(wim, image, target, paths, num_paths,
1625                                        extract_flags);
1626 }
1627
1628 WIMLIBAPI int
1629 wimlib_extract_pathlist(WIMStruct *wim, int image, const tchar *target,
1630                         const tchar *path_list_file, int extract_flags)
1631 {
1632         int ret;
1633         tchar **paths;
1634         size_t num_paths;
1635         void *mem;
1636
1637         ret = read_path_list_file(path_list_file, &paths, &num_paths, &mem);
1638         if (ret) {
1639                 ERROR("Failed to read path list file \"%"TS"\"",
1640                       path_list_file);
1641                 return ret;
1642         }
1643
1644         ret = wimlib_extract_paths(wim, image, target,
1645                                    (const tchar * const *)paths, num_paths,
1646                                    extract_flags);
1647         FREE(paths);
1648         FREE(mem);
1649         return ret;
1650 }
1651
1652 WIMLIBAPI int
1653 wimlib_extract_image_from_pipe_with_progress(int pipe_fd,
1654                                              const tchar *image_num_or_name,
1655                                              const tchar *target,
1656                                              int extract_flags,
1657                                              wimlib_progress_func_t progfunc,
1658                                              void *progctx)
1659 {
1660         int ret;
1661         WIMStruct *pwm;
1662         struct filedes *in_fd;
1663         int image;
1664         unsigned i;
1665
1666         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
1667                 return WIMLIB_ERR_INVALID_PARAM;
1668
1669         /* Read the WIM header from the pipe and get a WIMStruct to represent
1670          * the pipable WIM.  Caveats:  Unlike getting a WIMStruct with
1671          * wimlib_open_wim(), getting a WIMStruct in this way will result in
1672          * an empty lookup table, no XML data read, and no filename set.  */
1673         ret = open_wim_as_WIMStruct(&pipe_fd, WIMLIB_OPEN_FLAG_FROM_PIPE, &pwm,
1674                                     progfunc, progctx);
1675         if (ret)
1676                 return ret;
1677
1678         /* Sanity check to make sure this is a pipable WIM.  */
1679         if (pwm->hdr.magic != PWM_MAGIC) {
1680                 ERROR("The WIM being read from file descriptor %d "
1681                       "is not pipable!", pipe_fd);
1682                 ret = WIMLIB_ERR_NOT_PIPABLE;
1683                 goto out_wimlib_free;
1684         }
1685
1686         /* Sanity check to make sure the first part of a pipable split WIM is
1687          * sent over the pipe first.  */
1688         if (pwm->hdr.part_number != 1) {
1689                 ERROR("The first part of the split WIM must be "
1690                       "sent over the pipe first.");
1691                 ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1692                 goto out_wimlib_free;
1693         }
1694
1695         in_fd = &pwm->in_fd;
1696         wimlib_assert(in_fd->offset == WIM_HEADER_DISK_SIZE);
1697
1698         /* As mentioned, the WIMStruct we created from the pipe does not have
1699          * XML data yet.  Fix this by reading the extra copy of the XML data
1700          * that directly follows the header in pipable WIMs.  (Note: see
1701          * write_pipable_wim() for more details about the format of pipable
1702          * WIMs.)  */
1703         {
1704                 struct wim_lookup_table_entry xml_lte;
1705                 struct wim_resource_spec xml_rspec;
1706                 ret = read_pwm_stream_header(pwm, &xml_lte, &xml_rspec, 0, NULL);
1707                 if (ret)
1708                         goto out_wimlib_free;
1709
1710                 if (!(xml_lte.flags & WIM_RESHDR_FLAG_METADATA))
1711                 {
1712                         ERROR("Expected XML data, but found non-metadata "
1713                               "stream.");
1714                         ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1715                         goto out_wimlib_free;
1716                 }
1717
1718                 wim_res_spec_to_hdr(&xml_rspec, &pwm->hdr.xml_data_reshdr);
1719
1720                 ret = read_wim_xml_data(pwm);
1721                 if (ret)
1722                         goto out_wimlib_free;
1723
1724                 if (wim_info_get_num_images(pwm->wim_info) != pwm->hdr.image_count) {
1725                         ERROR("Image count in XML data is not the same as in WIM header.");
1726                         ret = WIMLIB_ERR_IMAGE_COUNT;
1727                         goto out_wimlib_free;
1728                 }
1729         }
1730
1731         /* Get image index (this may use the XML data that was just read to
1732          * resolve an image name).  */
1733         if (image_num_or_name) {
1734                 image = wimlib_resolve_image(pwm, image_num_or_name);
1735                 if (image == WIMLIB_NO_IMAGE) {
1736                         ERROR("\"%"TS"\" is not a valid image in the pipable WIM!",
1737                               image_num_or_name);
1738                         ret = WIMLIB_ERR_INVALID_IMAGE;
1739                         goto out_wimlib_free;
1740                 } else if (image == WIMLIB_ALL_IMAGES) {
1741                         ERROR("Applying all images from a pipe is not supported!");
1742                         ret = WIMLIB_ERR_INVALID_IMAGE;
1743                         goto out_wimlib_free;
1744                 }
1745         } else {
1746                 if (pwm->hdr.image_count != 1) {
1747                         ERROR("No image was specified, but the pipable WIM "
1748                               "did not contain exactly 1 image");
1749                         ret = WIMLIB_ERR_INVALID_IMAGE;
1750                         goto out_wimlib_free;
1751                 }
1752                 image = 1;
1753         }
1754
1755         /* Load the needed metadata resource.  */
1756         for (i = 1; i <= pwm->hdr.image_count; i++) {
1757                 struct wim_lookup_table_entry *metadata_lte;
1758                 struct wim_image_metadata *imd;
1759                 struct wim_resource_spec *metadata_rspec;
1760
1761                 metadata_lte = new_lookup_table_entry();
1762                 if (metadata_lte == NULL) {
1763                         ret = WIMLIB_ERR_NOMEM;
1764                         goto out_wimlib_free;
1765                 }
1766                 metadata_rspec = MALLOC(sizeof(struct wim_resource_spec));
1767                 if (metadata_rspec == NULL) {
1768                         ret = WIMLIB_ERR_NOMEM;
1769                         free_lookup_table_entry(metadata_lte);
1770                         goto out_wimlib_free;
1771                 }
1772
1773                 ret = read_pwm_stream_header(pwm, metadata_lte, metadata_rspec, 0, NULL);
1774                 imd = pwm->image_metadata[i - 1];
1775                 imd->metadata_lte = metadata_lte;
1776                 if (ret) {
1777                         FREE(metadata_rspec);
1778                         goto out_wimlib_free;
1779                 }
1780
1781                 if (!(metadata_lte->flags & WIM_RESHDR_FLAG_METADATA)) {
1782                         ERROR("Expected metadata resource, but found "
1783                               "non-metadata stream.");
1784                         ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1785                         goto out_wimlib_free;
1786                 }
1787
1788                 if (i == image) {
1789                         /* Metadata resource is for the image being extracted.
1790                          * Parse it and save the metadata in memory.  */
1791                         ret = read_metadata_resource(pwm, imd);
1792                         if (ret)
1793                                 goto out_wimlib_free;
1794                         imd->modified = 1;
1795                 } else {
1796                         /* Metadata resource is not for the image being
1797                          * extracted.  Skip over it.  */
1798                         ret = skip_wim_stream(metadata_lte);
1799                         if (ret)
1800                                 goto out_wimlib_free;
1801                 }
1802         }
1803         /* Extract the image.  */
1804         extract_flags |= WIMLIB_EXTRACT_FLAG_FROM_PIPE;
1805         ret = do_wimlib_extract_image(pwm, image, target, extract_flags);
1806         /* Clean up and return.  */
1807 out_wimlib_free:
1808         wimlib_free(pwm);
1809         return ret;
1810 }
1811
1812
1813 WIMLIBAPI int
1814 wimlib_extract_image_from_pipe(int pipe_fd, const tchar *image_num_or_name,
1815                                const tchar *target, int extract_flags)
1816 {
1817         return wimlib_extract_image_from_pipe_with_progress(pipe_fd,
1818                                                             image_num_or_name,
1819                                                             target,
1820                                                             extract_flags,
1821                                                             NULL,
1822                                                             NULL);
1823 }
1824
1825 WIMLIBAPI int
1826 wimlib_extract_image(WIMStruct *wim, int image, const tchar *target,
1827                      int extract_flags)
1828 {
1829         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
1830                 return WIMLIB_ERR_INVALID_PARAM;
1831         return do_wimlib_extract_image(wim, image, target, extract_flags);
1832 }