83477f1337eb897cea1b5a41d90d8ddee8e0ac69
[wimlib] / src / extract.c
1 /*
2  * extract.c
3  *
4  * Support for extracting WIM images, or files or directories contained in a WIM
5  * image.
6  */
7
8 /*
9  * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
10  *
11  * This file is free software; you can redistribute it and/or modify it under
12  * the terms of the GNU Lesser General Public License as published by the Free
13  * Software Foundation; either version 3 of the License, or (at your option) any
14  * later version.
15  *
16  * This file is distributed in the hope that it will be useful, but WITHOUT
17  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
19  * details.
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * along with this file; if not, see http://www.gnu.org/licenses/.
23  */
24
25 /*
26  * This file provides the API functions wimlib_extract_image(),
27  * wimlib_extract_image_from_pipe(), wimlib_extract_paths(), and
28  * wimlib_extract_pathlist().  Internally, all end up calling
29  * do_wimlib_extract_paths() and extract_trees().
30  *
31  * Although wimlib supports multiple extraction modes/backends (NTFS-3g, UNIX,
32  * Win32), this file does not itself have code to extract files or directories
33  * to any specific target; instead, it handles generic functionality and relies
34  * on lower-level callback functions declared in `struct apply_operations' to do
35  * the actual extraction.
36  */
37
38 #ifdef HAVE_CONFIG_H
39 #  include "config.h"
40 #endif
41
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <sys/stat.h>
45 #include <unistd.h>
46
47 #include "wimlib/apply.h"
48 #include "wimlib/assert.h"
49 #include "wimlib/blob_table.h"
50 #include "wimlib/dentry.h"
51 #include "wimlib/encoding.h"
52 #include "wimlib/endianness.h"
53 #include "wimlib/error.h"
54 #include "wimlib/metadata.h"
55 #include "wimlib/pathlist.h"
56 #include "wimlib/paths.h"
57 #include "wimlib/reparse.h"
58 #include "wimlib/resource.h"
59 #include "wimlib/security.h"
60 #include "wimlib/unix_data.h"
61 #include "wimlib/wildcard.h"
62 #include "wimlib/wim.h"
63 #include "wimlib/win32.h" /* for realpath() equivalent */
64 #include "wimlib/xml.h"
65
66 #define WIMLIB_EXTRACT_FLAG_FROM_PIPE   0x80000000
67 #define WIMLIB_EXTRACT_FLAG_IMAGEMODE   0x40000000
68
69 /* Keep in sync with wimlib.h  */
70 #define WIMLIB_EXTRACT_MASK_PUBLIC                              \
71         (WIMLIB_EXTRACT_FLAG_NTFS                       |       \
72          WIMLIB_EXTRACT_FLAG_UNIX_DATA                  |       \
73          WIMLIB_EXTRACT_FLAG_NO_ACLS                    |       \
74          WIMLIB_EXTRACT_FLAG_STRICT_ACLS                |       \
75          WIMLIB_EXTRACT_FLAG_RPFIX                      |       \
76          WIMLIB_EXTRACT_FLAG_NORPFIX                    |       \
77          WIMLIB_EXTRACT_FLAG_TO_STDOUT                  |       \
78          WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES  |       \
79          WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS         |       \
80          WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS          |       \
81          WIMLIB_EXTRACT_FLAG_STRICT_SHORT_NAMES         |       \
82          WIMLIB_EXTRACT_FLAG_STRICT_SYMLINKS            |       \
83          WIMLIB_EXTRACT_FLAG_GLOB_PATHS                 |       \
84          WIMLIB_EXTRACT_FLAG_STRICT_GLOB                |       \
85          WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES              |       \
86          WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE  |       \
87          WIMLIB_EXTRACT_FLAG_WIMBOOT)
88
89 /* Send WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE or
90  * WIMLIB_PROGRESS_MSG_EXTRACT_METADATA.  */
91 int
92 do_file_extract_progress(struct apply_ctx *ctx, enum wimlib_progress_msg msg)
93 {
94         ctx->count_until_file_progress = 500;  /* Arbitrary value to limit calls  */
95         return extract_progress(ctx, msg);
96 }
97
98 static int
99 start_file_phase(struct apply_ctx *ctx, u64 end_file_count, enum wimlib_progress_msg msg)
100 {
101         ctx->progress.extract.current_file_count = 0;
102         ctx->progress.extract.end_file_count = end_file_count;
103         return do_file_extract_progress(ctx, msg);
104 }
105
106 int
107 start_file_structure_phase(struct apply_ctx *ctx, u64 end_file_count)
108 {
109         return start_file_phase(ctx, end_file_count, WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE);
110 }
111
112 int
113 start_file_metadata_phase(struct apply_ctx *ctx, u64 end_file_count)
114 {
115         return start_file_phase(ctx, end_file_count, WIMLIB_PROGRESS_MSG_EXTRACT_METADATA);
116 }
117
118 static int
119 end_file_phase(struct apply_ctx *ctx, enum wimlib_progress_msg msg)
120 {
121         ctx->progress.extract.current_file_count = ctx->progress.extract.end_file_count;
122         return do_file_extract_progress(ctx, msg);
123 }
124
125 int
126 end_file_structure_phase(struct apply_ctx *ctx)
127 {
128         return end_file_phase(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE);
129 }
130
131 int
132 end_file_metadata_phase(struct apply_ctx *ctx)
133 {
134         return end_file_phase(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_METADATA);
135 }
136
137 #define PWM_ALLOW_WIM_HDR 0x00001
138
139 /* Read the header for a blob in a pipable WIM.  */
140 static int
141 read_pwm_blob_header(WIMStruct *pwm, struct blob_descriptor *blob,
142                      struct wim_resource_descriptor *rdesc,
143                      int flags, struct wim_header_disk *hdr_ret)
144 {
145         union {
146                 struct pwm_blob_hdr blob_hdr;
147                 struct wim_header_disk pwm_hdr;
148         } buf;
149         struct wim_reshdr reshdr;
150         int ret;
151
152         ret = full_read(&pwm->in_fd, &buf.blob_hdr, sizeof(buf.blob_hdr));
153         if (ret)
154                 goto read_error;
155
156         if ((flags & PWM_ALLOW_WIM_HDR) &&
157             le64_to_cpu(buf.blob_hdr.magic) == PWM_MAGIC)
158         {
159                 BUILD_BUG_ON(sizeof(buf.pwm_hdr) < sizeof(buf.blob_hdr));
160                 ret = full_read(&pwm->in_fd, &buf.blob_hdr + 1,
161                                 sizeof(buf.pwm_hdr) - sizeof(buf.blob_hdr));
162
163                 if (ret)
164                         goto read_error;
165                 blob->blob_location = BLOB_NONEXISTENT;
166                 memcpy(hdr_ret, &buf.pwm_hdr, sizeof(buf.pwm_hdr));
167                 return 0;
168         }
169
170         if (le64_to_cpu(buf.blob_hdr.magic) != PWM_BLOB_MAGIC) {
171                 ERROR("Data read on pipe is invalid (expected blob header).");
172                 return WIMLIB_ERR_INVALID_PIPABLE_WIM;
173         }
174
175         copy_hash(blob->hash, buf.blob_hdr.hash);
176
177         reshdr.size_in_wim = 0;
178         reshdr.flags = le32_to_cpu(buf.blob_hdr.flags);
179         reshdr.offset_in_wim = pwm->in_fd.offset;
180         reshdr.uncompressed_size = le64_to_cpu(buf.blob_hdr.uncompressed_size);
181         wim_res_hdr_to_desc(&reshdr, pwm, rdesc);
182         blob_set_is_located_in_nonsolid_wim_resource(blob, rdesc);
183         blob->is_metadata = (rdesc->flags & WIM_RESHDR_FLAG_METADATA) != 0;
184
185         if (unlikely(blob->size == 0))
186                 return WIMLIB_ERR_INVALID_PIPABLE_WIM;
187
188         return 0;
189
190 read_error:
191         ERROR_WITH_ERRNO("Error reading pipable WIM from pipe");
192         return ret;
193 }
194
195 static int
196 read_blobs_from_pipe(struct apply_ctx *ctx,
197                      const struct read_blob_list_callbacks *cbs)
198 {
199         struct blob_descriptor *found_blob = NULL;
200         struct wim_resource_descriptor *rdesc = NULL;
201         struct blob_table *blob_table;
202         int ret;
203
204         ret = WIMLIB_ERR_NOMEM;
205         found_blob = new_blob_descriptor();
206         if (!found_blob)
207                 goto out;
208
209         rdesc = MALLOC(sizeof(struct wim_resource_descriptor));
210         if (!rdesc)
211                 goto out;
212
213         blob_table = ctx->wim->blob_table;
214         memcpy(ctx->progress.extract.guid, ctx->wim->hdr.guid, WIM_GUID_LEN);
215         ctx->progress.extract.part_number = ctx->wim->hdr.part_number;
216         ctx->progress.extract.total_parts = ctx->wim->hdr.total_parts;
217         ret = extract_progress(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_SPWM_PART_BEGIN);
218         if (ret)
219                 goto out;
220
221         while (ctx->num_blobs_remaining) {
222                 struct wim_header_disk pwm_hdr;
223                 struct blob_descriptor *needed_blob;
224
225                 if (found_blob->blob_location != BLOB_NONEXISTENT)
226                         blob_unset_is_located_in_wim_resource(found_blob);
227                 ret = read_pwm_blob_header(ctx->wim, found_blob, rdesc,
228                                            PWM_ALLOW_WIM_HDR, &pwm_hdr);
229                 if (ret)
230                         goto out;
231
232                 if ((found_blob->blob_location != BLOB_NONEXISTENT)
233                     && !found_blob->is_metadata
234                     && (needed_blob = lookup_blob(blob_table, found_blob->hash))
235                     && (needed_blob->out_refcnt))
236                 {
237                         blob_unset_is_located_in_wim_resource(found_blob);
238                         blob_set_is_located_in_nonsolid_wim_resource(needed_blob, rdesc);
239
240                         ret = (*cbs->begin_blob)(needed_blob,
241                                                  cbs->begin_blob_ctx);
242                         if (ret) {
243                                 blob_unset_is_located_in_wim_resource(needed_blob);
244                                 goto out;
245                         }
246
247                         ret = extract_blob(needed_blob, needed_blob->size,
248                                            cbs->consume_chunk,
249                                            cbs->consume_chunk_ctx);
250
251                         ret = (*cbs->end_blob)(needed_blob, ret,
252                                                cbs->end_blob_ctx);
253                         blob_unset_is_located_in_wim_resource(needed_blob);
254                         if (ret)
255                                 goto out;
256                         ctx->num_blobs_remaining--;
257                 } else if (found_blob->blob_location != BLOB_NONEXISTENT) {
258                         ret = skip_wim_resource(found_blob->rdesc);
259                         if (ret)
260                                 goto out;
261                 } else {
262                         u16 part_number = le16_to_cpu(pwm_hdr.part_number);
263                         u16 total_parts = le16_to_cpu(pwm_hdr.total_parts);
264
265                         if (part_number != ctx->progress.extract.part_number ||
266                             total_parts != ctx->progress.extract.total_parts ||
267                             memcmp(pwm_hdr.guid, ctx->progress.extract.guid,
268                                    WIM_GUID_LEN))
269                         {
270                                 ctx->progress.extract.part_number = part_number;
271                                 ctx->progress.extract.total_parts = total_parts;
272                                 memcpy(ctx->progress.extract.guid,
273                                        pwm_hdr.guid, WIM_GUID_LEN);
274                                 ret = extract_progress(ctx,
275                                                        WIMLIB_PROGRESS_MSG_EXTRACT_SPWM_PART_BEGIN);
276                                 if (ret)
277                                         goto out;
278                         }
279                 }
280         }
281         ret = 0;
282 out:
283         if (found_blob && found_blob->blob_location != BLOB_IN_WIM)
284                 FREE(rdesc);
285         free_blob_descriptor(found_blob);
286         return ret;
287 }
288
289 /* Creates a temporary file opened for writing.  The open file descriptor is
290  * returned in @fd_ret and its name is returned in @name_ret (dynamically
291  * allocated).  */
292 static int
293 create_temporary_file(struct filedes *fd_ret, tchar **name_ret)
294 {
295         tchar *name;
296         int open_flags;
297         int raw_fd;
298
299 retry:
300         name = ttempnam(NULL, T("wimlib"));
301         if (!name) {
302                 ERROR_WITH_ERRNO("Failed to create temporary filename");
303                 return WIMLIB_ERR_NOMEM;
304         }
305
306         open_flags = O_WRONLY | O_CREAT | O_EXCL | O_BINARY;
307 #ifdef __WIN32__
308         open_flags |= _O_SHORT_LIVED;
309 #endif
310         raw_fd = topen(name, open_flags, 0600);
311
312         if (raw_fd < 0) {
313                 if (errno == EEXIST) {
314                         FREE(name);
315                         goto retry;
316                 }
317                 ERROR_WITH_ERRNO("Failed to create temporary file "
318                                  "\"%"TS"\"", name);
319                 FREE(name);
320                 return WIMLIB_ERR_OPEN;
321         }
322
323         filedes_init(fd_ret, raw_fd);
324         *name_ret = name;
325         return 0;
326 }
327
328 static int
329 begin_extract_blob_wrapper(struct blob_descriptor *blob, void *_ctx)
330 {
331         struct apply_ctx *ctx = _ctx;
332
333         ctx->cur_blob = blob;
334         ctx->cur_blob_offset = 0;
335
336         if (unlikely(blob->out_refcnt > MAX_OPEN_FILES))
337                 return create_temporary_file(&ctx->tmpfile_fd, &ctx->tmpfile_name);
338         else
339                 return (*ctx->saved_cbs->begin_blob)(blob, ctx->saved_cbs->begin_blob_ctx);
340 }
341
342 static int
343 extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx)
344 {
345         struct apply_ctx *ctx = _ctx;
346         union wimlib_progress_info *progress = &ctx->progress;
347         int ret;
348
349         ctx->cur_blob_offset += size;
350
351         if (likely(ctx->supported_features.hard_links)) {
352                 progress->extract.completed_bytes +=
353                         (u64)size * ctx->cur_blob->out_refcnt;
354                 if (ctx->cur_blob_offset == ctx->cur_blob->size)
355                         progress->extract.completed_streams += ctx->cur_blob->out_refcnt;
356         } else {
357                 const struct blob_extraction_target *targets =
358                         blob_extraction_targets(ctx->cur_blob);
359                 for (u32 i = 0; i < ctx->cur_blob->out_refcnt; i++) {
360                         const struct wim_inode *inode = targets[i].inode;
361                         const struct wim_dentry *dentry;
362
363                         list_for_each_entry(dentry,
364                                             &inode->i_extraction_aliases,
365                                             d_extraction_alias_node)
366                         {
367                                 progress->extract.completed_bytes += size;
368                                 if (ctx->cur_blob_offset == ctx->cur_blob->size)
369                                         progress->extract.completed_streams++;
370                         }
371                 }
372         }
373         if (progress->extract.completed_bytes >= ctx->next_progress) {
374
375                 ret = extract_progress(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS);
376                 if (ret)
377                         return ret;
378
379                 if (progress->extract.completed_bytes >=
380                     progress->extract.total_bytes)
381                 {
382                         ctx->next_progress = UINT64_MAX;
383                 } else {
384                         /* Send new message as soon as another 1/128 of the
385                          * total has been extracted.  (Arbitrary number.)  */
386                         ctx->next_progress =
387                                 progress->extract.completed_bytes +
388                                         progress->extract.total_bytes / 128;
389
390                         /* ... Unless that would be more than 5000000 bytes, in
391                          * which case send the next after the next 5000000
392                          * bytes.  (Another arbitrary number.)  */
393                         if (progress->extract.completed_bytes + 5000000 <
394                             ctx->next_progress)
395                                 ctx->next_progress =
396                                         progress->extract.completed_bytes + 5000000;
397
398                         /* ... But always send a message as soon as we're
399                          * completely done.  */
400                         if (progress->extract.total_bytes < ctx->next_progress)
401                                 ctx->next_progress = progress->extract.total_bytes;
402                 }
403         }
404
405         if (unlikely(filedes_valid(&ctx->tmpfile_fd))) {
406                 /* Just extracting to temporary file for now.  */
407                 ret = full_write(&ctx->tmpfile_fd, chunk, size);
408                 if (ret) {
409                         ERROR_WITH_ERRNO("Error writing data to "
410                                          "temporary file \"%"TS"\"",
411                                          ctx->tmpfile_name);
412                 }
413                 return ret;
414         } else {
415                 return (*ctx->saved_cbs->consume_chunk)(chunk, size,
416                                                         ctx->saved_cbs->consume_chunk_ctx);
417         }
418 }
419
420 static int
421 extract_from_tmpfile(const tchar *tmpfile_name, struct apply_ctx *ctx)
422 {
423         struct blob_descriptor tmpfile_blob;
424         struct blob_descriptor *orig_blob = ctx->cur_blob;
425         const struct read_blob_list_callbacks *cbs = ctx->saved_cbs;
426         int ret;
427         const u32 orig_refcnt = orig_blob->out_refcnt;
428
429         BUILD_BUG_ON(MAX_OPEN_FILES <
430                      ARRAY_LEN(orig_blob->inline_blob_extraction_targets));
431
432         struct blob_extraction_target *targets = orig_blob->blob_extraction_targets;
433
434         /* Copy the blob's data from the temporary file to each of its targets.
435          *
436          * This is executed only in the very uncommon case that a blob is being
437          * extracted to more than MAX_OPEN_FILES targets!  */
438
439         memcpy(&tmpfile_blob, orig_blob, sizeof(struct blob_descriptor));
440         tmpfile_blob.blob_location = BLOB_IN_FILE_ON_DISK;
441         tmpfile_blob.file_on_disk = ctx->tmpfile_name;
442         ret = 0;
443         for (u32 i = 0; i < orig_refcnt; i++) {
444
445                 /* Note: it usually doesn't matter whether we pass the original
446                  * blob descriptor to callbacks provided by the extraction
447                  * backend as opposed to the tmpfile blob descriptor, since they
448                  * shouldn't actually read data from the blob other than through
449                  * the read_blob_prefix() call below.  But for
450                  * WIMLIB_EXTRACT_FLAG_WIMBOOT mode on Windows it does matter
451                  * because it needs access to the original WIM resource
452                  * descriptor in order to create the external backing reference.
453                  */
454
455                 orig_blob->out_refcnt = 1;
456                 orig_blob->inline_blob_extraction_targets[0] = targets[i];
457
458                 ret = (*cbs->begin_blob)(orig_blob, cbs->begin_blob_ctx);
459                 if (ret)
460                         break;
461
462                 /* Extra SHA-1 isn't necessary here, but it shouldn't hurt as
463                  * this case is very rare anyway.  */
464                 ret = extract_blob(&tmpfile_blob, tmpfile_blob.size,
465                                    cbs->consume_chunk,
466                                    cbs->consume_chunk_ctx);
467
468                 ret = (*cbs->end_blob)(orig_blob, ret, cbs->end_blob_ctx);
469                 if (ret)
470                         break;
471         }
472         FREE(targets);
473         orig_blob->out_refcnt = 0;
474         return ret;
475 }
476
477 static int
478 end_extract_blob_wrapper(struct blob_descriptor *blob, int status, void *_ctx)
479 {
480         struct apply_ctx *ctx = _ctx;
481
482         if (unlikely(filedes_valid(&ctx->tmpfile_fd))) {
483                 filedes_close(&ctx->tmpfile_fd);
484                 if (!status)
485                         status = extract_from_tmpfile(ctx->tmpfile_name, ctx);
486                 filedes_invalidate(&ctx->tmpfile_fd);
487                 tunlink(ctx->tmpfile_name);
488                 FREE(ctx->tmpfile_name);
489                 return status;
490         } else {
491                 return (*ctx->saved_cbs->end_blob)(blob, status,
492                                                    ctx->saved_cbs->end_blob_ctx);
493         }
494 }
495
496 /*
497  * Read the list of blobs to extract and feed their data into the specified
498  * callback functions.
499  *
500  * This handles checksumming each blob.
501  *
502  * This also handles sending WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS.
503  *
504  * This also works if the WIM is being read from a pipe, whereas attempting to
505  * read blobs directly (e.g. with read_full_blob_into_buf()) will not.
506  *
507  * This also will split up blobs that will need to be extracted to more than
508  * MAX_OPEN_FILES locations, as measured by the 'out_refcnt' of each blob.
509  * Therefore, the apply_operations implementation need not worry about running
510  * out of file descriptors, unless it might open more than one file descriptor
511  * per 'blob_extraction_target' (e.g. Win32 currently might because the
512  * destination file system might not support hard links).
513  */
514 int
515 extract_blob_list(struct apply_ctx *ctx,
516                   const struct read_blob_list_callbacks *cbs)
517 {
518         struct read_blob_list_callbacks wrapper_cbs = {
519                 .begin_blob        = begin_extract_blob_wrapper,
520                 .begin_blob_ctx    = ctx,
521                 .consume_chunk     = extract_chunk_wrapper,
522                 .consume_chunk_ctx = ctx,
523                 .end_blob          = end_extract_blob_wrapper,
524                 .end_blob_ctx      = ctx,
525         };
526         ctx->saved_cbs = cbs;
527         if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
528                 return read_blobs_from_pipe(ctx, &wrapper_cbs);
529         } else {
530                 return read_blob_list(&ctx->blob_list,
531                                       offsetof(struct blob_descriptor,
532                                                extraction_list),
533                                       &wrapper_cbs, VERIFY_BLOB_HASHES);
534         }
535 }
536
537 /* Extract a WIM dentry to standard output.
538  *
539  * This obviously doesn't make sense in all cases.  We return an error if the
540  * dentry does not correspond to a regular file.  Otherwise we extract the
541  * unnamed data stream only.  */
542 static int
543 extract_dentry_to_stdout(struct wim_dentry *dentry,
544                          const struct blob_table *blob_table)
545 {
546         struct wim_inode *inode = dentry->d_inode;
547         struct blob_descriptor *blob;
548         struct filedes _stdout;
549
550         if (inode->i_attributes & (FILE_ATTRIBUTE_REPARSE_POINT |
551                                    FILE_ATTRIBUTE_DIRECTORY |
552                                    FILE_ATTRIBUTE_ENCRYPTED))
553         {
554                 ERROR("\"%"TS"\" is not a regular file and therefore cannot be "
555                       "extracted to standard output", dentry_full_path(dentry));
556                 return WIMLIB_ERR_NOT_A_REGULAR_FILE;
557         }
558
559         blob = inode_get_blob_for_unnamed_data_stream(inode, blob_table);
560         if (!blob) {
561                 const u8 *hash = inode_get_hash_of_unnamed_data_stream(inode);
562                 if (!is_zero_hash(hash))
563                         return blob_not_found_error(inode, hash);
564                 return 0;
565         }
566
567         filedes_init(&_stdout, STDOUT_FILENO);
568         return extract_full_blob_to_fd(blob, &_stdout);
569 }
570
571 static int
572 extract_dentries_to_stdout(struct wim_dentry **dentries, size_t num_dentries,
573                            const struct blob_table *blob_table)
574 {
575         for (size_t i = 0; i < num_dentries; i++) {
576                 int ret = extract_dentry_to_stdout(dentries[i], blob_table);
577                 if (ret)
578                         return ret;
579         }
580         return 0;
581 }
582
583 /**********************************************************************/
584
585 /*
586  * Removes duplicate dentries from the array.
587  *
588  * Returns the new number of dentries, packed at the front of the array.
589  */
590 static size_t
591 remove_duplicate_trees(struct wim_dentry **trees, size_t num_trees)
592 {
593         size_t i, j = 0;
594         for (i = 0; i < num_trees; i++) {
595                 if (!trees[i]->tmp_flag) {
596                         /* Found distinct dentry.  */
597                         trees[i]->tmp_flag = 1;
598                         trees[j++] = trees[i];
599                 }
600         }
601         for (i = 0; i < j; i++)
602                 trees[i]->tmp_flag = 0;
603         return j;
604 }
605
606 /*
607  * Remove dentries that are descendants of other dentries in the array.
608  *
609  * Returns the new number of dentries, packed at the front of the array.
610  */
611 static size_t
612 remove_contained_trees(struct wim_dentry **trees, size_t num_trees)
613 {
614         size_t i, j = 0;
615         for (i = 0; i < num_trees; i++)
616                 trees[i]->tmp_flag = 1;
617         for (i = 0; i < num_trees; i++) {
618                 struct wim_dentry *d = trees[i];
619                 while (!dentry_is_root(d)) {
620                         d = d->d_parent;
621                         if (d->tmp_flag)
622                                 goto tree_contained;
623                 }
624                 trees[j++] = trees[i];
625                 continue;
626
627         tree_contained:
628                 trees[i]->tmp_flag = 0;
629         }
630
631         for (i = 0; i < j; i++)
632                 trees[i]->tmp_flag = 0;
633         return j;
634 }
635
636 static int
637 dentry_append_to_list(struct wim_dentry *dentry, void *_dentry_list)
638 {
639         struct list_head *dentry_list = _dentry_list;
640         list_add_tail(&dentry->d_extraction_list_node, dentry_list);
641         return 0;
642 }
643
644 static void
645 dentry_reset_extraction_list_node(struct wim_dentry *dentry)
646 {
647         dentry->d_extraction_list_node = (struct list_head){NULL, NULL};
648 }
649
650 static int
651 dentry_delete_from_list(struct wim_dentry *dentry, void *_ignore)
652 {
653         list_del(&dentry->d_extraction_list_node);
654         dentry_reset_extraction_list_node(dentry);
655         return 0;
656 }
657
658 /*
659  * Build the preliminary list of dentries to be extracted.
660  *
661  * The list maintains the invariant that if d1 and d2 are in the list and d1 is
662  * an ancestor of d2, then d1 appears before d2 in the list.
663  */
664 static void
665 build_dentry_list(struct list_head *dentry_list, struct wim_dentry **trees,
666                   size_t num_trees, bool add_ancestors)
667 {
668         INIT_LIST_HEAD(dentry_list);
669
670         /* Add the trees recursively.  */
671         for (size_t i = 0; i < num_trees; i++)
672                 for_dentry_in_tree(trees[i], dentry_append_to_list, dentry_list);
673
674         /* If requested, add ancestors of the trees.  */
675         if (add_ancestors) {
676                 for (size_t i = 0; i < num_trees; i++) {
677                         struct wim_dentry *dentry = trees[i];
678                         struct wim_dentry *ancestor;
679                         struct list_head *place_after;
680
681                         if (dentry_is_root(dentry))
682                                 continue;
683
684                         place_after = dentry_list;
685                         ancestor = dentry;
686                         do {
687                                 ancestor = ancestor->d_parent;
688                                 if (will_extract_dentry(ancestor)) {
689                                         place_after = &ancestor->d_extraction_list_node;
690                                         break;
691                                 }
692                         } while (!dentry_is_root(ancestor));
693
694                         ancestor = dentry;
695                         do {
696                                 ancestor = ancestor->d_parent;
697                                 if (will_extract_dentry(ancestor))
698                                         break;
699                                 list_add(&ancestor->d_extraction_list_node, place_after);
700                         } while (!dentry_is_root(ancestor));
701                 }
702         }
703 }
704
705 static void
706 destroy_dentry_list(struct list_head *dentry_list)
707 {
708         struct wim_dentry *dentry, *tmp;
709         struct wim_inode *inode;
710
711         list_for_each_entry_safe(dentry, tmp, dentry_list, d_extraction_list_node) {
712                 inode = dentry->d_inode;
713                 dentry_reset_extraction_list_node(dentry);
714                 inode->i_visited = 0;
715                 inode->i_can_externally_back = 0;
716                 if ((void *)dentry->d_extraction_name != (void *)dentry->file_name)
717                         FREE(dentry->d_extraction_name);
718                 dentry->d_extraction_name = NULL;
719                 dentry->d_extraction_name_nchars = 0;
720         }
721 }
722
723 static void
724 destroy_blob_list(struct list_head *blob_list)
725 {
726         struct blob_descriptor *blob;
727
728         list_for_each_entry(blob, blob_list, extraction_list)
729                 if (blob->out_refcnt > ARRAY_LEN(blob->inline_blob_extraction_targets))
730                         FREE(blob->blob_extraction_targets);
731 }
732
733 #ifdef __WIN32__
734 static const utf16lechar replacement_char = cpu_to_le16(0xfffd);
735 #else
736 static const utf16lechar replacement_char = cpu_to_le16('?');
737 #endif
738
739 static bool
740 file_name_valid(utf16lechar *name, size_t num_chars, bool fix)
741 {
742         size_t i;
743
744         if (num_chars == 0)
745                 return true;
746         for (i = 0; i < num_chars; i++) {
747                 switch (name[i]) {
748         #ifdef __WIN32__
749                 case cpu_to_le16('\\'):
750                 case cpu_to_le16(':'):
751                 case cpu_to_le16('*'):
752                 case cpu_to_le16('?'):
753                 case cpu_to_le16('"'):
754                 case cpu_to_le16('<'):
755                 case cpu_to_le16('>'):
756                 case cpu_to_le16('|'):
757         #endif
758                 case cpu_to_le16('/'):
759                 case cpu_to_le16('\0'):
760                         if (fix)
761                                 name[i] = replacement_char;
762                         else
763                                 return false;
764                 }
765         }
766
767 #ifdef __WIN32__
768         if (name[num_chars - 1] == cpu_to_le16(' ') ||
769             name[num_chars - 1] == cpu_to_le16('.'))
770         {
771                 if (fix)
772                         name[num_chars - 1] = replacement_char;
773                 else
774                         return false;
775         }
776 #endif
777         return true;
778 }
779
780 static int
781 dentry_calculate_extraction_name(struct wim_dentry *dentry,
782                                  struct apply_ctx *ctx)
783 {
784         int ret;
785
786         if (dentry_is_root(dentry))
787                 return 0;
788
789 #ifdef WITH_NTFS_3G
790         if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
791                 dentry->d_extraction_name = dentry->file_name;
792                 dentry->d_extraction_name_nchars = dentry->file_name_nbytes /
793                                                    sizeof(utf16lechar);
794                 return 0;
795         }
796 #endif
797
798         if (!ctx->supported_features.case_sensitive_filenames) {
799                 struct wim_dentry *other;
800                 list_for_each_entry(other, &dentry->d_ci_conflict_list,
801                                     d_ci_conflict_list)
802                 {
803                         if (will_extract_dentry(other)) {
804                                 if (ctx->extract_flags &
805                                     WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS) {
806                                         WARNING("\"%"TS"\" has the same "
807                                                 "case-insensitive name as "
808                                                 "\"%"TS"\"; extracting "
809                                                 "dummy name instead",
810                                                 dentry_full_path(dentry),
811                                                 dentry_full_path(other));
812                                         goto out_replace;
813                                 } else {
814                                         WARNING("Not extracting \"%"TS"\": "
815                                                 "has same case-insensitive "
816                                                 "name as \"%"TS"\"",
817                                                 dentry_full_path(dentry),
818                                                 dentry_full_path(other));
819                                         goto skip_dentry;
820                                 }
821                         }
822                 }
823         }
824
825         if (file_name_valid(dentry->file_name, dentry->file_name_nbytes / 2, false)) {
826                 ret = utf16le_get_tstr(dentry->file_name,
827                                        dentry->file_name_nbytes,
828                                        (const tchar **)&dentry->d_extraction_name,
829                                        &dentry->d_extraction_name_nchars);
830                 dentry->d_extraction_name_nchars /= sizeof(tchar);
831                 return ret;
832         } else {
833                 if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES)
834                 {
835                         WARNING("\"%"TS"\" has an invalid filename "
836                                 "that is not supported on this platform; "
837                                 "extracting dummy name instead",
838                                 dentry_full_path(dentry));
839                         goto out_replace;
840                 } else {
841                         WARNING("Not extracting \"%"TS"\": has an invalid filename "
842                                 "that is not supported on this platform",
843                                 dentry_full_path(dentry));
844                         goto skip_dentry;
845                 }
846         }
847
848 out_replace:
849         {
850                 utf16lechar utf16_name_copy[dentry->file_name_nbytes / 2];
851
852                 memcpy(utf16_name_copy, dentry->file_name, dentry->file_name_nbytes);
853                 file_name_valid(utf16_name_copy, dentry->file_name_nbytes / 2, true);
854
855                 const tchar *tchar_name;
856                 size_t tchar_nchars;
857
858                 ret = utf16le_get_tstr(utf16_name_copy,
859                                        dentry->file_name_nbytes,
860                                        &tchar_name, &tchar_nchars);
861                 if (ret)
862                         return ret;
863
864                 tchar_nchars /= sizeof(tchar);
865
866                 size_t fixed_name_num_chars = tchar_nchars;
867                 tchar fixed_name[tchar_nchars + 50];
868
869                 tmemcpy(fixed_name, tchar_name, tchar_nchars);
870                 fixed_name_num_chars += tsprintf(fixed_name + tchar_nchars,
871                                                  T(" (invalid filename #%lu)"),
872                                                  ++ctx->invalid_sequence);
873
874                 utf16le_put_tstr(tchar_name);
875
876                 dentry->d_extraction_name = TSTRDUP(fixed_name);
877                 if (!dentry->d_extraction_name)
878                         return WIMLIB_ERR_NOMEM;
879                 dentry->d_extraction_name_nchars = fixed_name_num_chars;
880         }
881         return 0;
882
883 skip_dentry:
884         for_dentry_in_tree(dentry, dentry_delete_from_list, NULL);
885         return 0;
886 }
887
888 /*
889  * Calculate the actual filename component at which each WIM dentry will be
890  * extracted, with special handling for dentries that are unsupported by the
891  * extraction backend or have invalid names.
892  *
893  * ctx->supported_features must be filled in.
894  *
895  * Possible error codes: WIMLIB_ERR_NOMEM, WIMLIB_ERR_INVALID_UTF16_STRING
896  */
897 static int
898 dentry_list_calculate_extraction_names(struct list_head *dentry_list,
899                                        struct apply_ctx *ctx)
900 {
901         struct list_head *prev, *cur;
902
903         /* Can't use list_for_each_entry() because a call to
904          * dentry_calculate_extraction_name() may delete the current dentry and
905          * its children from the list.  */
906
907         prev = dentry_list;
908         for (;;) {
909                 struct wim_dentry *dentry;
910                 int ret;
911
912                 cur = prev->next;
913                 if (cur == dentry_list)
914                         break;
915
916                 dentry = list_entry(cur, struct wim_dentry, d_extraction_list_node);
917
918                 ret = dentry_calculate_extraction_name(dentry, ctx);
919                 if (ret)
920                         return ret;
921
922                 if (prev->next == cur)
923                         prev = cur;
924                 else
925                         ; /* Current dentry and its children (which follow in
926                              the list) were deleted.  prev stays the same.  */
927         }
928         return 0;
929 }
930
931 static int
932 dentry_resolve_streams(struct wim_dentry *dentry, int extract_flags,
933                        struct blob_table *blob_table)
934 {
935         struct wim_inode *inode = dentry->d_inode;
936         struct blob_descriptor *blob;
937         int ret;
938         bool force = false;
939
940         /* Special case:  when extracting from a pipe, the WIM blob table is
941          * initially empty, so "resolving" an inode's streams is initially not
942          * possible.  However, we still need to keep track of which blobs,
943          * identified by SHA-1 message digests, need to be extracted, so we
944          * "resolve" the inode's streams anyway by allocating a 'struct
945          * blob_descriptor' for each one.  */
946         if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE)
947                 force = true;
948         ret = inode_resolve_streams(inode, blob_table, force);
949         if (ret)
950                 return ret;
951         for (unsigned i = 0; i < inode->i_num_streams; i++) {
952                 blob = stream_blob_resolved(&inode->i_streams[i]);
953                 if (blob)
954                         blob->out_refcnt = 0;
955         }
956         return 0;
957 }
958
959 /*
960  * For each dentry to be extracted, resolve all streams in the corresponding
961  * inode and set 'out_refcnt' in all referenced blob_descriptors to 0.
962  *
963  * Possible error codes: WIMLIB_ERR_RESOURCE_NOT_FOUND, WIMLIB_ERR_NOMEM.
964  */
965 static int
966 dentry_list_resolve_streams(struct list_head *dentry_list,
967                             struct apply_ctx *ctx)
968 {
969         struct wim_dentry *dentry;
970         int ret;
971
972         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
973                 ret = dentry_resolve_streams(dentry,
974                                              ctx->extract_flags,
975                                              ctx->wim->blob_table);
976                 if (ret)
977                         return ret;
978         }
979         return 0;
980 }
981
982 static int
983 ref_stream(struct wim_inode_stream *strm, struct wim_dentry *dentry,
984            struct apply_ctx *ctx)
985 {
986         struct wim_inode *inode = dentry->d_inode;
987         struct blob_descriptor *blob = stream_blob_resolved(strm);
988         struct blob_extraction_target *targets;
989
990         if (!blob)
991                 return 0;
992
993         /* Tally the size only for each actual extraction of the stream (not
994          * additional hard links to the inode).  */
995         if (inode->i_visited && ctx->supported_features.hard_links)
996                 return 0;
997
998         ctx->progress.extract.total_bytes += blob->size;
999         ctx->progress.extract.total_streams++;
1000
1001         if (inode->i_visited)
1002                 return 0;
1003
1004         /* Add each blob to 'ctx->blob_list' only one time, regardless of how
1005          * many extraction targets it will have.  */
1006         if (blob->out_refcnt == 0) {
1007                 list_add_tail(&blob->extraction_list, &ctx->blob_list);
1008                 ctx->num_blobs_remaining++;
1009         }
1010
1011         /* Set this stream as an extraction target of 'blob'.  */
1012
1013         if (blob->out_refcnt < ARRAY_LEN(blob->inline_blob_extraction_targets)) {
1014                 targets = blob->inline_blob_extraction_targets;
1015         } else {
1016                 struct blob_extraction_target *prev_targets;
1017                 size_t alloc_blob_extraction_targets;
1018
1019                 if (blob->out_refcnt == ARRAY_LEN(blob->inline_blob_extraction_targets)) {
1020                         prev_targets = NULL;
1021                         alloc_blob_extraction_targets = ARRAY_LEN(blob->inline_blob_extraction_targets);
1022                 } else {
1023                         prev_targets = blob->blob_extraction_targets;
1024                         alloc_blob_extraction_targets = blob->alloc_blob_extraction_targets;
1025                 }
1026
1027                 if (blob->out_refcnt == alloc_blob_extraction_targets) {
1028                         alloc_blob_extraction_targets *= 2;
1029                         targets = REALLOC(prev_targets,
1030                                           alloc_blob_extraction_targets *
1031                                           sizeof(targets[0]));
1032                         if (!targets)
1033                                 return WIMLIB_ERR_NOMEM;
1034                         if (!prev_targets) {
1035                                 memcpy(targets,
1036                                        blob->inline_blob_extraction_targets,
1037                                        sizeof(blob->inline_blob_extraction_targets));
1038                         }
1039                         blob->blob_extraction_targets = targets;
1040                         blob->alloc_blob_extraction_targets = alloc_blob_extraction_targets;
1041                 }
1042                 targets = blob->blob_extraction_targets;
1043         }
1044         targets[blob->out_refcnt].inode = inode;
1045         targets[blob->out_refcnt].stream = strm;
1046         blob->out_refcnt++;
1047         return 0;
1048 }
1049
1050 static int
1051 ref_stream_if_needed(struct wim_dentry *dentry, struct wim_inode *inode,
1052                      struct wim_inode_stream *strm, struct apply_ctx *ctx)
1053 {
1054         bool need_stream = false;
1055         switch (strm->stream_type) {
1056         case STREAM_TYPE_DATA:
1057                 if (stream_is_named(strm)) {
1058                         /* Named data stream  */
1059                         if (ctx->supported_features.named_data_streams)
1060                                 need_stream = true;
1061                 } else if (!(inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY |
1062                                                     FILE_ATTRIBUTE_ENCRYPTED))
1063                            && !(inode_is_symlink(inode)
1064                                 && !ctx->supported_features.reparse_points
1065                                 && ctx->supported_features.symlink_reparse_points))
1066                 {
1067                         /*
1068                          * Unnamed data stream.  Skip if any of the following is true:
1069                          *
1070                          * - file is a directory
1071                          * - file is encrypted
1072                          * - backend needs to create the file as UNIX symlink
1073                          * - backend will extract the stream as externally backed
1074                          */
1075                         if (ctx->apply_ops->will_externally_back) {
1076                                 int ret = (*ctx->apply_ops->will_externally_back)(dentry, ctx);
1077                                 if (ret > 0) /* Error?  */
1078                                         return ret;
1079                                 if (ret < 0) /* Won't externally back?  */
1080                                         need_stream = true;
1081                         } else {
1082                                 need_stream = true;
1083                         }
1084                 }
1085                 break;
1086         case STREAM_TYPE_REPARSE_POINT:
1087                 wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
1088                 if (ctx->supported_features.reparse_points ||
1089                     (inode_is_symlink(inode) &&
1090                      ctx->supported_features.symlink_reparse_points))
1091                         need_stream = true;
1092                 break;
1093         case STREAM_TYPE_EFSRPC_RAW_DATA:
1094                 wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED);
1095                 if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) {
1096                         if (ctx->supported_features.encrypted_directories)
1097                                 need_stream = true;
1098                 } else {
1099                         if (ctx->supported_features.encrypted_files)
1100                                 need_stream = true;
1101                 }
1102                 break;
1103         }
1104         if (need_stream)
1105                 return ref_stream(strm, dentry, ctx);
1106         return 0;
1107 }
1108
1109 static int
1110 dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx)
1111 {
1112         struct wim_inode *inode = dentry->d_inode;
1113         for (unsigned i = 0; i < inode->i_num_streams; i++) {
1114                 int ret = ref_stream_if_needed(dentry, inode,
1115                                                &inode->i_streams[i], ctx);
1116                 if (ret)
1117                         return ret;
1118         }
1119         inode->i_visited = 1;
1120         return 0;
1121 }
1122
1123 /*
1124  * Given a list of dentries to be extracted, build the list of blobs that need
1125  * to be extracted, and for each blob determine the streams to which that blob
1126  * will be extracted.
1127  *
1128  * This also initializes the extract progress info with byte and blob
1129  * information.
1130  *
1131  * ctx->supported_features must be filled in.
1132  */
1133 static int
1134 dentry_list_ref_streams(struct list_head *dentry_list, struct apply_ctx *ctx)
1135 {
1136         struct wim_dentry *dentry;
1137         int ret;
1138
1139         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
1140                 ret = dentry_ref_streams(dentry, ctx);
1141                 if (ret)
1142                         return ret;
1143         }
1144         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
1145                 dentry->d_inode->i_visited = 0;
1146         return 0;
1147 }
1148
1149 static void
1150 dentry_list_build_inode_alias_lists(struct list_head *dentry_list)
1151 {
1152         struct wim_dentry *dentry;
1153         struct wim_inode *inode;
1154
1155         list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
1156                 inode = dentry->d_inode;
1157                 if (!inode->i_visited)
1158                         INIT_LIST_HEAD(&inode->i_extraction_aliases);
1159                 list_add_tail(&dentry->d_extraction_alias_node,
1160                               &inode->i_extraction_aliases);
1161                 inode->i_visited = 1;
1162         }
1163         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
1164                 dentry->d_inode->i_visited = 0;
1165 }
1166
1167 static void
1168 inode_tally_features(const struct wim_inode *inode,
1169                      struct wim_features *features)
1170 {
1171         if (inode->i_attributes & FILE_ATTRIBUTE_ARCHIVE)
1172                 features->archive_files++;
1173         if (inode->i_attributes & FILE_ATTRIBUTE_HIDDEN)
1174                 features->hidden_files++;
1175         if (inode->i_attributes & FILE_ATTRIBUTE_SYSTEM)
1176                 features->system_files++;
1177         if (inode->i_attributes & FILE_ATTRIBUTE_COMPRESSED)
1178                 features->compressed_files++;
1179         if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
1180                 if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)
1181                         features->encrypted_directories++;
1182                 else
1183                         features->encrypted_files++;
1184         }
1185         if (inode->i_attributes & FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)
1186                 features->not_context_indexed_files++;
1187         if (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE)
1188                 features->sparse_files++;
1189         if (inode_has_named_data_stream(inode))
1190                 features->named_data_streams++;
1191         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
1192                 features->reparse_points++;
1193                 if (inode_is_symlink(inode))
1194                         features->symlink_reparse_points++;
1195                 else
1196                         features->other_reparse_points++;
1197         }
1198         if (inode->i_security_id != -1)
1199                 features->security_descriptors++;
1200         if (inode_has_unix_data(inode))
1201                 features->unix_data++;
1202 }
1203
1204 /* Tally features necessary to extract a dentry and the corresponding inode.  */
1205 static void
1206 dentry_tally_features(struct wim_dentry *dentry, struct wim_features *features)
1207 {
1208         struct wim_inode *inode = dentry->d_inode;
1209
1210         if (dentry_has_short_name(dentry))
1211                 features->short_names++;
1212
1213         if (inode->i_visited) {
1214                 features->hard_links++;
1215         } else {
1216                 inode_tally_features(inode, features);
1217                 inode->i_visited = 1;
1218         }
1219 }
1220
1221 /* Tally the features necessary to extract the specified dentries.  */
1222 static void
1223 dentry_list_get_features(struct list_head *dentry_list,
1224                          struct wim_features *features)
1225 {
1226         struct wim_dentry *dentry;
1227
1228         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
1229                 dentry_tally_features(dentry, features);
1230
1231         list_for_each_entry(dentry, dentry_list, d_extraction_list_node)
1232                 dentry->d_inode->i_visited = 0;
1233 }
1234
1235 static int
1236 do_feature_check(const struct wim_features *required_features,
1237                  const struct wim_features *supported_features,
1238                  int extract_flags)
1239 {
1240         /* Encrypted files.  */
1241         if (required_features->encrypted_files &&
1242             !supported_features->encrypted_files)
1243                 WARNING("Ignoring EFS-encrypted data of %lu files",
1244                         required_features->encrypted_files);
1245
1246         /* Named data streams.  */
1247         if (required_features->named_data_streams &&
1248             !supported_features->named_data_streams)
1249                 WARNING("Ignoring named data streams of %lu files",
1250                         required_features->named_data_streams);
1251
1252         /* File attributes.  */
1253         if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES)) {
1254                 /* Note: Don't bother the user about FILE_ATTRIBUTE_ARCHIVE.
1255                  * We're an archive program, so theoretically we can do what we
1256                  * want with it.  */
1257
1258                 if (required_features->hidden_files &&
1259                     !supported_features->hidden_files)
1260                         WARNING("Ignoring FILE_ATTRIBUTE_HIDDEN of %lu files",
1261                                 required_features->hidden_files);
1262
1263                 if (required_features->system_files &&
1264                     !supported_features->system_files)
1265                         WARNING("Ignoring FILE_ATTRIBUTE_SYSTEM of %lu files",
1266                                 required_features->system_files);
1267
1268                 if (required_features->compressed_files &&
1269                     !supported_features->compressed_files)
1270                         WARNING("Ignoring FILE_ATTRIBUTE_COMPRESSED of %lu files",
1271                                 required_features->compressed_files);
1272
1273                 if (required_features->not_context_indexed_files &&
1274                     !supported_features->not_context_indexed_files)
1275                         WARNING("Ignoring FILE_ATTRIBUTE_NOT_CONTENT_INDEXED of %lu files",
1276                                 required_features->not_context_indexed_files);
1277
1278                 if (required_features->sparse_files &&
1279                     !supported_features->sparse_files)
1280                         WARNING("Ignoring FILE_ATTRIBUTE_SPARSE_FILE of %lu files",
1281                                 required_features->sparse_files);
1282
1283                 if (required_features->encrypted_directories &&
1284                     !supported_features->encrypted_directories)
1285                         WARNING("Ignoring FILE_ATTRIBUTE_ENCRYPTED of %lu directories",
1286                                 required_features->encrypted_directories);
1287         }
1288
1289         /* Hard links.  */
1290         if (required_features->hard_links && !supported_features->hard_links)
1291                 WARNING("Extracting %lu hard links as independent files",
1292                         required_features->hard_links);
1293
1294         /* Symbolic links and reparse points.  */
1295         if ((extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_SYMLINKS) &&
1296             required_features->symlink_reparse_points &&
1297             !supported_features->symlink_reparse_points &&
1298             !supported_features->reparse_points)
1299         {
1300                 ERROR("Extraction backend does not support symbolic links!");
1301                 return WIMLIB_ERR_UNSUPPORTED;
1302         }
1303         if (required_features->reparse_points &&
1304             !supported_features->reparse_points)
1305         {
1306                 if (supported_features->symlink_reparse_points) {
1307                         if (required_features->other_reparse_points) {
1308                                 WARNING("Ignoring reparse data of %lu non-symlink/junction files",
1309                                         required_features->other_reparse_points);
1310                         }
1311                 } else {
1312                         WARNING("Ignoring reparse data of %lu files",
1313                                 required_features->reparse_points);
1314                 }
1315         }
1316
1317         /* Security descriptors.  */
1318         if (((extract_flags & (WIMLIB_EXTRACT_FLAG_STRICT_ACLS |
1319                                WIMLIB_EXTRACT_FLAG_UNIX_DATA))
1320              == WIMLIB_EXTRACT_FLAG_STRICT_ACLS) &&
1321             required_features->security_descriptors &&
1322             !supported_features->security_descriptors)
1323         {
1324                 ERROR("Extraction backend does not support security descriptors!");
1325                 return WIMLIB_ERR_UNSUPPORTED;
1326         }
1327         if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ACLS) &&
1328             required_features->security_descriptors &&
1329             !supported_features->security_descriptors)
1330                 WARNING("Ignoring Windows NT security descriptors of %lu files",
1331                         required_features->security_descriptors);
1332
1333         /* UNIX data.  */
1334         if ((extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) &&
1335             required_features->unix_data && !supported_features->unix_data)
1336         {
1337                 ERROR("Extraction backend does not support UNIX data!");
1338                 return WIMLIB_ERR_UNSUPPORTED;
1339         }
1340
1341         if (required_features->unix_data &&
1342             !(extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA))
1343         {
1344                 WARNING("Ignoring UNIX metadata of %lu files",
1345                         required_features->unix_data);
1346         }
1347
1348         /* DOS Names.  */
1349         if (required_features->short_names &&
1350             !supported_features->short_names)
1351         {
1352                 if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_SHORT_NAMES) {
1353                         ERROR("Extraction backend does not support DOS names!");
1354                         return WIMLIB_ERR_UNSUPPORTED;
1355                 }
1356                 WARNING("Ignoring DOS names of %lu files",
1357                         required_features->short_names);
1358         }
1359
1360         /* Timestamps.  */
1361         if ((extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS) &&
1362             !supported_features->timestamps)
1363         {
1364                 ERROR("Extraction backend does not support timestamps!");
1365                 return WIMLIB_ERR_UNSUPPORTED;
1366         }
1367
1368         return 0;
1369 }
1370
1371 static const struct apply_operations *
1372 select_apply_operations(int extract_flags)
1373 {
1374 #ifdef WITH_NTFS_3G
1375         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS)
1376                 return &ntfs_3g_apply_ops;
1377 #endif
1378 #ifdef __WIN32__
1379         return &win32_apply_ops;
1380 #else
1381         return &unix_apply_ops;
1382 #endif
1383 }
1384
1385 static int
1386 extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
1387               const tchar *target, int extract_flags)
1388 {
1389         const struct apply_operations *ops;
1390         struct apply_ctx *ctx;
1391         int ret;
1392         LIST_HEAD(dentry_list);
1393
1394         if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) {
1395                 ret = extract_dentries_to_stdout(trees, num_trees,
1396                                                  wim->blob_table);
1397                 goto out;
1398         }
1399
1400         num_trees = remove_duplicate_trees(trees, num_trees);
1401         num_trees = remove_contained_trees(trees, num_trees);
1402
1403         ops = select_apply_operations(extract_flags);
1404
1405         if (num_trees > 1 && ops->single_tree_only) {
1406                 ERROR("Extracting multiple directory trees "
1407                       "at once is not supported in %s extraction mode!",
1408                       ops->name);
1409                 ret = WIMLIB_ERR_UNSUPPORTED;
1410                 goto out;
1411         }
1412
1413         ctx = CALLOC(1, ops->context_size);
1414         if (!ctx) {
1415                 ret = WIMLIB_ERR_NOMEM;
1416                 goto out;
1417         }
1418
1419         ctx->wim = wim;
1420         ctx->target = target;
1421         ctx->target_nchars = tstrlen(target);
1422         ctx->extract_flags = extract_flags;
1423         if (ctx->wim->progfunc) {
1424                 ctx->progfunc = ctx->wim->progfunc;
1425                 ctx->progctx = ctx->wim->progctx;
1426                 ctx->progress.extract.image = wim->current_image;
1427                 ctx->progress.extract.extract_flags = (extract_flags &
1428                                                        WIMLIB_EXTRACT_MASK_PUBLIC);
1429                 ctx->progress.extract.wimfile_name = wim->filename;
1430                 ctx->progress.extract.image_name = wimlib_get_image_name(wim,
1431                                                                          wim->current_image);
1432                 ctx->progress.extract.target = target;
1433         }
1434         INIT_LIST_HEAD(&ctx->blob_list);
1435         filedes_invalidate(&ctx->tmpfile_fd);
1436         ctx->apply_ops = ops;
1437
1438         ret = (*ops->get_supported_features)(target, &ctx->supported_features);
1439         if (ret)
1440                 goto out_cleanup;
1441
1442         build_dentry_list(&dentry_list, trees, num_trees,
1443                           !(extract_flags &
1444                             WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE));
1445
1446         dentry_list_get_features(&dentry_list, &ctx->required_features);
1447
1448         ret = do_feature_check(&ctx->required_features, &ctx->supported_features,
1449                                ctx->extract_flags);
1450         if (ret)
1451                 goto out_cleanup;
1452
1453         ret = dentry_list_calculate_extraction_names(&dentry_list, ctx);
1454         if (ret)
1455                 goto out_cleanup;
1456
1457         if (unlikely(list_empty(&dentry_list))) {
1458                 WARNING("There is nothing to extract!");
1459                 goto out_cleanup;
1460         }
1461
1462         ret = dentry_list_resolve_streams(&dentry_list, ctx);
1463         if (ret)
1464                 goto out_cleanup;
1465
1466         dentry_list_build_inode_alias_lists(&dentry_list);
1467
1468         ret = dentry_list_ref_streams(&dentry_list, ctx);
1469         if (ret)
1470                 goto out_cleanup;
1471
1472         if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
1473                 /* When extracting from a pipe, the number of bytes of data to
1474                  * extract can't be determined in the normal way (examining the
1475                  * blob table), since at this point all we have is a set of
1476                  * SHA-1 message digests of blobs that need to be extracted.
1477                  * However, we can get a reasonably accurate estimate by taking
1478                  * <TOTALBYTES> from the corresponding <IMAGE> in the WIM XML
1479                  * data.  This does assume that a full image is being extracted,
1480                  * but currently there is no API for doing otherwise.  (Also,
1481                  * subtract <HARDLINKBYTES> from this if hard links are
1482                  * supported by the extraction mode.)  */
1483                 ctx->progress.extract.total_bytes =
1484                         wim_info_get_image_total_bytes(wim->wim_info,
1485                                                        wim->current_image);
1486                 if (ctx->supported_features.hard_links) {
1487                         ctx->progress.extract.total_bytes -=
1488                                 wim_info_get_image_hard_link_bytes(wim->wim_info,
1489                                                                    wim->current_image);
1490                 }
1491         }
1492
1493         ret = extract_progress(ctx,
1494                                ((extract_flags & WIMLIB_EXTRACT_FLAG_IMAGEMODE) ?
1495                                        WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN :
1496                                        WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN));
1497         if (ret)
1498                 goto out_cleanup;
1499
1500         ret = (*ops->extract)(&dentry_list, ctx);
1501         if (ret)
1502                 goto out_cleanup;
1503
1504         if (ctx->progress.extract.completed_bytes <
1505             ctx->progress.extract.total_bytes)
1506         {
1507                 ctx->progress.extract.completed_bytes =
1508                         ctx->progress.extract.total_bytes;
1509                 ret = extract_progress(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS);
1510                 if (ret)
1511                         goto out_cleanup;
1512         }
1513
1514         ret = extract_progress(ctx,
1515                                ((extract_flags & WIMLIB_EXTRACT_FLAG_IMAGEMODE) ?
1516                                        WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END :
1517                                        WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END));
1518 out_cleanup:
1519         destroy_blob_list(&ctx->blob_list);
1520         destroy_dentry_list(&dentry_list);
1521         FREE(ctx);
1522 out:
1523         return ret;
1524 }
1525
1526 static int
1527 mkdir_if_needed(const tchar *target)
1528 {
1529         if (!tmkdir(target, 0755))
1530                 return 0;
1531
1532         if (errno == EEXIST)
1533                 return 0;
1534
1535 #ifdef __WIN32__
1536         /* _wmkdir() fails with EACCES if called on a drive root directory.  */
1537         if (errno == EACCES)
1538                 return 0;
1539 #endif
1540
1541         ERROR_WITH_ERRNO("Failed to create directory \"%"TS"\"", target);
1542         return WIMLIB_ERR_MKDIR;
1543 }
1544
1545 /* Make sure the extraction flags make sense, and update them if needed.  */
1546 static int
1547 check_extract_flags(const WIMStruct *wim, int *extract_flags_p)
1548 {
1549         int extract_flags = *extract_flags_p;
1550
1551         /* Check for invalid flag combinations  */
1552
1553         if ((extract_flags &
1554              (WIMLIB_EXTRACT_FLAG_NO_ACLS |
1555               WIMLIB_EXTRACT_FLAG_STRICT_ACLS)) == (WIMLIB_EXTRACT_FLAG_NO_ACLS |
1556                                                     WIMLIB_EXTRACT_FLAG_STRICT_ACLS))
1557                 return WIMLIB_ERR_INVALID_PARAM;
1558
1559         if ((extract_flags &
1560              (WIMLIB_EXTRACT_FLAG_RPFIX |
1561               WIMLIB_EXTRACT_FLAG_NORPFIX)) == (WIMLIB_EXTRACT_FLAG_RPFIX |
1562                                                 WIMLIB_EXTRACT_FLAG_NORPFIX))
1563                 return WIMLIB_ERR_INVALID_PARAM;
1564
1565 #ifndef WITH_NTFS_3G
1566         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
1567                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
1568                       "        it cannot apply a WIM image directly to an NTFS volume.");
1569                 return WIMLIB_ERR_UNSUPPORTED;
1570         }
1571 #endif
1572
1573         if (extract_flags & WIMLIB_EXTRACT_FLAG_WIMBOOT) {
1574 #ifdef __WIN32__
1575                 if (!wim->filename)
1576                         return WIMLIB_ERR_NO_FILENAME;
1577 #else
1578                 ERROR("WIMBoot extraction is only supported on Windows!");
1579                 return WIMLIB_ERR_UNSUPPORTED;
1580 #endif
1581         }
1582
1583
1584         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
1585                               WIMLIB_EXTRACT_FLAG_NORPFIX |
1586                               WIMLIB_EXTRACT_FLAG_IMAGEMODE)) ==
1587                                         WIMLIB_EXTRACT_FLAG_IMAGEMODE)
1588         {
1589                 /* For full-image extraction, do reparse point fixups by default
1590                  * if the WIM header says they are enabled.  */
1591                 if (wim->hdr.flags & WIM_HDR_FLAG_RP_FIX)
1592                         extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX;
1593         }
1594
1595         *extract_flags_p = extract_flags;
1596         return 0;
1597 }
1598
1599 static u32
1600 get_wildcard_flags(int extract_flags)
1601 {
1602         u32 wildcard_flags = 0;
1603
1604         if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_GLOB)
1605                 wildcard_flags |= WILDCARD_FLAG_ERROR_IF_NO_MATCH;
1606         else
1607                 wildcard_flags |= WILDCARD_FLAG_WARN_IF_NO_MATCH;
1608
1609         if (default_ignore_case)
1610                 wildcard_flags |= WILDCARD_FLAG_CASE_INSENSITIVE;
1611
1612         return wildcard_flags;
1613 }
1614
1615 struct append_dentry_ctx {
1616         struct wim_dentry **dentries;
1617         size_t num_dentries;
1618         size_t num_alloc_dentries;
1619 };
1620
1621 static int
1622 append_dentry_cb(struct wim_dentry *dentry, void *_ctx)
1623 {
1624         struct append_dentry_ctx *ctx = _ctx;
1625
1626         if (ctx->num_dentries == ctx->num_alloc_dentries) {
1627                 struct wim_dentry **new_dentries;
1628                 size_t new_length;
1629
1630                 new_length = max(ctx->num_alloc_dentries + 8,
1631                                  ctx->num_alloc_dentries * 3 / 2);
1632                 new_dentries = REALLOC(ctx->dentries,
1633                                        new_length * sizeof(ctx->dentries[0]));
1634                 if (new_dentries == NULL)
1635                         return WIMLIB_ERR_NOMEM;
1636                 ctx->dentries = new_dentries;
1637                 ctx->num_alloc_dentries = new_length;
1638         }
1639         ctx->dentries[ctx->num_dentries++] = dentry;
1640         return 0;
1641 }
1642
1643 static int
1644 do_wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target,
1645                         const tchar * const *paths, size_t num_paths,
1646                         int extract_flags)
1647 {
1648         int ret;
1649         struct wim_dentry **trees;
1650         size_t num_trees;
1651
1652         if (wim == NULL || target == NULL || target[0] == T('\0') ||
1653             (num_paths != 0 && paths == NULL))
1654                 return WIMLIB_ERR_INVALID_PARAM;
1655
1656         ret = check_extract_flags(wim, &extract_flags);
1657         if (ret)
1658                 return ret;
1659
1660         ret = select_wim_image(wim, image);
1661         if (ret)
1662                 return ret;
1663
1664         ret = wim_checksum_unhashed_blobs(wim);
1665         if (ret)
1666                 return ret;
1667
1668         if ((extract_flags & (WIMLIB_EXTRACT_FLAG_NTFS |
1669                               WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE)) ==
1670             (WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE))
1671         {
1672                 ret = mkdir_if_needed(target);
1673                 if (ret)
1674                         return ret;
1675         }
1676
1677         if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) {
1678
1679                 struct append_dentry_ctx append_dentry_ctx = {
1680                         .dentries = NULL,
1681                         .num_dentries = 0,
1682                         .num_alloc_dentries = 0,
1683                 };
1684
1685                 u32 wildcard_flags = get_wildcard_flags(extract_flags);
1686
1687                 for (size_t i = 0; i < num_paths; i++) {
1688                         tchar *path = canonicalize_wim_path(paths[i]);
1689                         if (path == NULL) {
1690                                 ret = WIMLIB_ERR_NOMEM;
1691                                 trees = append_dentry_ctx.dentries;
1692                                 goto out_free_trees;
1693                         }
1694                         ret = expand_wildcard(wim, path,
1695                                               append_dentry_cb,
1696                                               &append_dentry_ctx,
1697                                               wildcard_flags);
1698                         FREE(path);
1699                         if (ret) {
1700                                 trees = append_dentry_ctx.dentries;
1701                                 goto out_free_trees;
1702                         }
1703                 }
1704                 trees = append_dentry_ctx.dentries;
1705                 num_trees = append_dentry_ctx.num_dentries;
1706         } else {
1707                 trees = MALLOC(num_paths * sizeof(trees[0]));
1708                 if (trees == NULL)
1709                         return WIMLIB_ERR_NOMEM;
1710
1711                 for (size_t i = 0; i < num_paths; i++) {
1712
1713                         tchar *path = canonicalize_wim_path(paths[i]);
1714                         if (path == NULL) {
1715                                 ret = WIMLIB_ERR_NOMEM;
1716                                 goto out_free_trees;
1717                         }
1718
1719                         trees[i] = get_dentry(wim, path,
1720                                               WIMLIB_CASE_PLATFORM_DEFAULT);
1721                         FREE(path);
1722                         if (trees[i] == NULL) {
1723                                   ERROR("Path \"%"TS"\" does not exist "
1724                                         "in WIM image %d",
1725                                         paths[i], wim->current_image);
1726                                   ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
1727                                   goto out_free_trees;
1728                         }
1729                 }
1730                 num_trees = num_paths;
1731         }
1732
1733         if (num_trees == 0) {
1734                 ret = 0;
1735                 goto out_free_trees;
1736         }
1737
1738         ret = extract_trees(wim, trees, num_trees, target, extract_flags);
1739 out_free_trees:
1740         FREE(trees);
1741         return ret;
1742 }
1743
1744 static int
1745 extract_single_image(WIMStruct *wim, int image,
1746                      const tchar *target, int extract_flags)
1747 {
1748         const tchar *path = WIMLIB_WIM_ROOT_PATH;
1749         extract_flags |= WIMLIB_EXTRACT_FLAG_IMAGEMODE;
1750         return do_wimlib_extract_paths(wim, image, target, &path, 1, extract_flags);
1751 }
1752
1753 static const tchar * const filename_forbidden_chars =
1754 T(
1755 #ifdef __WIN32__
1756 "<>:\"/\\|?*"
1757 #else
1758 "/"
1759 #endif
1760 );
1761
1762 /* This function checks if it is okay to use a WIM image's name as a directory
1763  * name.  */
1764 static bool
1765 image_name_ok_as_dir(const tchar *image_name)
1766 {
1767         return image_name && *image_name &&
1768                 !tstrpbrk(image_name, filename_forbidden_chars) &&
1769                 tstrcmp(image_name, T(".")) &&
1770                 tstrcmp(image_name, T(".."));
1771 }
1772
1773 /* Extracts all images from the WIM to the directory @target, with the images
1774  * placed in subdirectories named by their image names. */
1775 static int
1776 extract_all_images(WIMStruct *wim, const tchar *target, int extract_flags)
1777 {
1778         size_t image_name_max_len = max(xml_get_max_image_name_len(wim), 20);
1779         size_t output_path_len = tstrlen(target);
1780         tchar buf[output_path_len + 1 + image_name_max_len + 1];
1781         int ret;
1782         int image;
1783         const tchar *image_name;
1784
1785         if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
1786                 ERROR("Cannot extract multiple images in NTFS extraction mode.");
1787                 return WIMLIB_ERR_INVALID_PARAM;
1788         }
1789
1790         ret = mkdir_if_needed(target);
1791         if (ret)
1792                 return ret;
1793         tmemcpy(buf, target, output_path_len);
1794         buf[output_path_len] = OS_PREFERRED_PATH_SEPARATOR;
1795         for (image = 1; image <= wim->hdr.image_count; image++) {
1796                 image_name = wimlib_get_image_name(wim, image);
1797                 if (image_name_ok_as_dir(image_name)) {
1798                         tstrcpy(buf + output_path_len + 1, image_name);
1799                 } else {
1800                         /* Image name is empty or contains forbidden characters.
1801                          * Use image number instead. */
1802                         tsprintf(buf + output_path_len + 1, T("%d"), image);
1803                 }
1804                 ret = extract_single_image(wim, image, buf, extract_flags);
1805                 if (ret)
1806                         return ret;
1807         }
1808         return 0;
1809 }
1810
1811 static int
1812 do_wimlib_extract_image(WIMStruct *wim, int image, const tchar *target,
1813                         int extract_flags)
1814 {
1815         if (extract_flags & (WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE |
1816                              WIMLIB_EXTRACT_FLAG_TO_STDOUT |
1817                              WIMLIB_EXTRACT_FLAG_GLOB_PATHS))
1818                 return WIMLIB_ERR_INVALID_PARAM;
1819
1820         if (image == WIMLIB_ALL_IMAGES)
1821                 return extract_all_images(wim, target, extract_flags);
1822         else
1823                 return extract_single_image(wim, image, target, extract_flags);
1824 }
1825
1826
1827 /****************************************************************************
1828  *                          Extraction API                                  *
1829  ****************************************************************************/
1830
1831 WIMLIBAPI int
1832 wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target,
1833                      const tchar * const *paths, size_t num_paths,
1834                      int extract_flags)
1835 {
1836         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
1837                 return WIMLIB_ERR_INVALID_PARAM;
1838
1839         return do_wimlib_extract_paths(wim, image, target, paths, num_paths,
1840                                        extract_flags);
1841 }
1842
1843 WIMLIBAPI int
1844 wimlib_extract_pathlist(WIMStruct *wim, int image, const tchar *target,
1845                         const tchar *path_list_file, int extract_flags)
1846 {
1847         int ret;
1848         tchar **paths;
1849         size_t num_paths;
1850         void *mem;
1851
1852         ret = read_path_list_file(path_list_file, &paths, &num_paths, &mem);
1853         if (ret) {
1854                 ERROR("Failed to read path list file \"%"TS"\"",
1855                       path_list_file);
1856                 return ret;
1857         }
1858
1859         ret = wimlib_extract_paths(wim, image, target,
1860                                    (const tchar * const *)paths, num_paths,
1861                                    extract_flags);
1862         FREE(paths);
1863         FREE(mem);
1864         return ret;
1865 }
1866
1867 WIMLIBAPI int
1868 wimlib_extract_image_from_pipe_with_progress(int pipe_fd,
1869                                              const tchar *image_num_or_name,
1870                                              const tchar *target,
1871                                              int extract_flags,
1872                                              wimlib_progress_func_t progfunc,
1873                                              void *progctx)
1874 {
1875         int ret;
1876         WIMStruct *pwm;
1877         struct filedes *in_fd;
1878         int image;
1879         unsigned i;
1880
1881         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
1882                 return WIMLIB_ERR_INVALID_PARAM;
1883
1884         /* Read the WIM header from the pipe and get a WIMStruct to represent
1885          * the pipable WIM.  Caveats:  Unlike getting a WIMStruct with
1886          * wimlib_open_wim(), getting a WIMStruct in this way will result in an
1887          * empty blob table, no XML data read, and no filename set.  */
1888         ret = open_wim_as_WIMStruct(&pipe_fd, WIMLIB_OPEN_FLAG_FROM_PIPE, &pwm,
1889                                     progfunc, progctx);
1890         if (ret)
1891                 return ret;
1892
1893         /* Sanity check to make sure this is a pipable WIM.  */
1894         if (pwm->hdr.magic != PWM_MAGIC) {
1895                 ERROR("The WIM being read from file descriptor %d "
1896                       "is not pipable!", pipe_fd);
1897                 ret = WIMLIB_ERR_NOT_PIPABLE;
1898                 goto out_wimlib_free;
1899         }
1900
1901         /* Sanity check to make sure the first part of a pipable split WIM is
1902          * sent over the pipe first.  */
1903         if (pwm->hdr.part_number != 1) {
1904                 ERROR("The first part of the split WIM must be "
1905                       "sent over the pipe first.");
1906                 ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1907                 goto out_wimlib_free;
1908         }
1909
1910         in_fd = &pwm->in_fd;
1911         wimlib_assert(in_fd->offset == WIM_HEADER_DISK_SIZE);
1912
1913         /* As mentioned, the WIMStruct we created from the pipe does not have
1914          * XML data yet.  Fix this by reading the extra copy of the XML data
1915          * that directly follows the header in pipable WIMs.  (Note: see
1916          * write_pipable_wim() for more details about the format of pipable
1917          * WIMs.)  */
1918         {
1919                 struct blob_descriptor xml_blob;
1920                 struct wim_resource_descriptor xml_rdesc;
1921                 ret = read_pwm_blob_header(pwm, &xml_blob, &xml_rdesc, 0, NULL);
1922                 if (ret)
1923                         goto out_wimlib_free;
1924
1925                 if (!xml_blob.is_metadata) {
1926                         ERROR("Expected XML data, but found non-metadata resource.");
1927                         ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1928                         goto out_wimlib_free;
1929                 }
1930
1931                 wim_res_desc_to_hdr(&xml_rdesc, &pwm->hdr.xml_data_reshdr);
1932
1933                 ret = read_wim_xml_data(pwm);
1934                 if (ret)
1935                         goto out_wimlib_free;
1936
1937                 if (wim_info_get_num_images(pwm->wim_info) != pwm->hdr.image_count) {
1938                         ERROR("Image count in XML data is not the same as in WIM header.");
1939                         ret = WIMLIB_ERR_IMAGE_COUNT;
1940                         goto out_wimlib_free;
1941                 }
1942         }
1943
1944         /* Get image index (this may use the XML data that was just read to
1945          * resolve an image name).  */
1946         if (image_num_or_name) {
1947                 image = wimlib_resolve_image(pwm, image_num_or_name);
1948                 if (image == WIMLIB_NO_IMAGE) {
1949                         ERROR("\"%"TS"\" is not a valid image in the pipable WIM!",
1950                               image_num_or_name);
1951                         ret = WIMLIB_ERR_INVALID_IMAGE;
1952                         goto out_wimlib_free;
1953                 } else if (image == WIMLIB_ALL_IMAGES) {
1954                         ERROR("Applying all images from a pipe is not supported!");
1955                         ret = WIMLIB_ERR_INVALID_IMAGE;
1956                         goto out_wimlib_free;
1957                 }
1958         } else {
1959                 if (pwm->hdr.image_count != 1) {
1960                         ERROR("No image was specified, but the pipable WIM "
1961                               "did not contain exactly 1 image");
1962                         ret = WIMLIB_ERR_INVALID_IMAGE;
1963                         goto out_wimlib_free;
1964                 }
1965                 image = 1;
1966         }
1967
1968         /* Load the needed metadata resource.  */
1969         for (i = 1; i <= pwm->hdr.image_count; i++) {
1970                 struct blob_descriptor *metadata_blob;
1971                 struct wim_image_metadata *imd;
1972                 struct wim_resource_descriptor *metadata_rdesc;
1973
1974                 metadata_blob = new_blob_descriptor();
1975                 if (metadata_blob == NULL) {
1976                         ret = WIMLIB_ERR_NOMEM;
1977                         goto out_wimlib_free;
1978                 }
1979                 metadata_rdesc = MALLOC(sizeof(struct wim_resource_descriptor));
1980                 if (metadata_rdesc == NULL) {
1981                         ret = WIMLIB_ERR_NOMEM;
1982                         free_blob_descriptor(metadata_blob);
1983                         goto out_wimlib_free;
1984                 }
1985
1986                 ret = read_pwm_blob_header(pwm, metadata_blob, metadata_rdesc, 0, NULL);
1987                 imd = pwm->image_metadata[i - 1];
1988                 imd->metadata_blob = metadata_blob;
1989                 if (ret) {
1990                         FREE(metadata_rdesc);
1991                         goto out_wimlib_free;
1992                 }
1993
1994                 if (!metadata_blob->is_metadata) {
1995                         ERROR("Expected metadata resource, but found "
1996                               "non-metadata resource.");
1997                         ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
1998                         goto out_wimlib_free;
1999                 }
2000
2001                 if (i == image) {
2002                         /* Metadata resource is for the image being extracted.
2003                          * Parse it and save the metadata in memory.  */
2004                         ret = read_metadata_resource(imd);
2005                         if (ret)
2006                                 goto out_wimlib_free;
2007                         imd->modified = 1;
2008                 } else {
2009                         /* Metadata resource is not for the image being
2010                          * extracted.  Skip over it.  */
2011                         ret = skip_wim_resource(metadata_rdesc);
2012                         if (ret)
2013                                 goto out_wimlib_free;
2014                 }
2015         }
2016         /* Extract the image.  */
2017         extract_flags |= WIMLIB_EXTRACT_FLAG_FROM_PIPE;
2018         ret = do_wimlib_extract_image(pwm, image, target, extract_flags);
2019         /* Clean up and return.  */
2020 out_wimlib_free:
2021         wimlib_free(pwm);
2022         return ret;
2023 }
2024
2025
2026 WIMLIBAPI int
2027 wimlib_extract_image_from_pipe(int pipe_fd, const tchar *image_num_or_name,
2028                                const tchar *target, int extract_flags)
2029 {
2030         return wimlib_extract_image_from_pipe_with_progress(pipe_fd,
2031                                                             image_num_or_name,
2032                                                             target,
2033                                                             extract_flags,
2034                                                             NULL,
2035                                                             NULL);
2036 }
2037
2038 WIMLIBAPI int
2039 wimlib_extract_image(WIMStruct *wim, int image, const tchar *target,
2040                      int extract_flags)
2041 {
2042         if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC)
2043                 return WIMLIB_ERR_INVALID_PARAM;
2044         return do_wimlib_extract_image(wim, image, target, extract_flags);
2045 }