Windows native build
[wimlib] / src / add_image.c
1 /*
2  * add_image.c
3  */
4
5 /*
6  * Copyright (C) 2012, 2013 Eric Biggers
7  *
8  * This file is part of wimlib, a library for working with WIM files.
9  *
10  * wimlib is free software; you can redistribute it and/or modify it under the
11  * terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option)
13  * any later version.
14  *
15  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17  * A PARTICULAR PURPOSE. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with wimlib; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #include "config.h"
25
26 #if defined(__CYGWIN__) || defined(__WIN32__)
27 #       include <windows.h>
28 #       include <ntdef.h>
29 #       include <wchar.h>
30 #       ifdef ERROR
31 #               undef ERROR
32 #       endif
33 #       include "security.h"
34 #else
35 #       include <dirent.h>
36 #       include <sys/stat.h>
37 #       include "timestamp.h"
38 #endif
39
40 #ifdef __WIN32__
41 #include <shlwapi.h>
42 #endif
43
44 #include "wimlib_internal.h"
45 #include "dentry.h"
46 #include "lookup_table.h"
47 #include "xml.h"
48 #include <ctype.h>
49 #include <errno.h>
50
51 #ifndef __WIN32__
52 #include <fnmatch.h>
53 #endif
54
55 #include <stdlib.h>
56 #include <string.h>
57 #include <unistd.h>
58
59 #ifdef HAVE_ALLOCA_H
60 #include <alloca.h>
61 #endif
62
63 #define WIMLIB_ADD_IMAGE_FLAG_ROOT      0x80000000
64 #define WIMLIB_ADD_IMAGE_FLAG_SOURCE    0x40000000
65
66 /*
67  * Adds the dentry tree and security data for a new image to the image metadata
68  * array of the WIMStruct.
69  */
70 int add_new_dentry_tree(WIMStruct *w, struct wim_dentry *root_dentry,
71                         struct wim_security_data *sd)
72 {
73         struct wim_lookup_table_entry *metadata_lte;
74         struct wim_image_metadata *imd;
75         struct wim_image_metadata *new_imd;
76
77         wimlib_assert(root_dentry != NULL);
78
79         DEBUG("Reallocating image metadata array for image_count = %u",
80               w->hdr.image_count + 1);
81         imd = CALLOC((w->hdr.image_count + 1), sizeof(struct wim_image_metadata));
82
83         if (!imd) {
84                 ERROR("Failed to allocate memory for new image metadata array");
85                 goto err;
86         }
87
88         memcpy(imd, w->image_metadata,
89                w->hdr.image_count * sizeof(struct wim_image_metadata));
90
91         metadata_lte = new_lookup_table_entry();
92         if (!metadata_lte)
93                 goto err_free_imd;
94
95         metadata_lte->resource_entry.flags = WIM_RESHDR_FLAG_METADATA;
96         random_hash(metadata_lte->hash);
97         lookup_table_insert(w->lookup_table, metadata_lte);
98
99         new_imd = &imd[w->hdr.image_count];
100
101         new_imd->root_dentry    = root_dentry;
102         new_imd->metadata_lte   = metadata_lte;
103         new_imd->security_data  = sd;
104         new_imd->modified       = 1;
105
106         FREE(w->image_metadata);
107         w->image_metadata = imd;
108         w->hdr.image_count++;
109         return 0;
110 err_free_imd:
111         FREE(imd);
112 err:
113         return WIMLIB_ERR_NOMEM;
114
115 }
116
117 #if defined(__CYGWIN__) || defined(__WIN32__)
118
119 static u64 FILETIME_to_u64(const FILETIME *ft)
120 {
121         return ((u64)ft->dwHighDateTime << 32) | (u64)ft->dwLowDateTime;
122 }
123
124
125 static int build_dentry_tree(struct wim_dentry **root_ret,
126                              const char *root_disk_path,
127                              struct wim_lookup_table *lookup_table,
128                              struct wim_security_data *sd,
129                              const struct capture_config *config,
130                              int add_image_flags,
131                              wimlib_progress_func_t progress_func,
132                              void *extra_arg);
133
134 static int win32_get_short_name(struct wim_dentry *dentry,
135                                 const wchar_t *path_utf16)
136 {
137         WIN32_FIND_DATAW dat;
138         if (FindFirstFileW(path_utf16, &dat) &&
139             dat.cAlternateFileName[0] != L'\0')
140         {
141                 size_t short_name_len = wcslen(dat.cAlternateFileName) * 2;
142                 size_t n = short_name_len + sizeof(wchar_t);
143                 dentry->short_name = MALLOC(n);
144                 if (!dentry->short_name)
145                         return WIMLIB_ERR_NOMEM;
146                 memcpy(dentry->short_name, dat.cAlternateFileName, n);
147                 dentry->short_name_len = short_name_len;
148         }
149         return 0;
150 }
151
152 static int win32_get_security_descriptor(struct wim_dentry *dentry,
153                                          struct sd_set *sd_set,
154                                          const wchar_t *path_utf16)
155 {
156         SECURITY_INFORMATION requestedInformation;
157         DWORD lenNeeded = 0;
158         BOOL status;
159         DWORD err;
160
161         requestedInformation = DACL_SECURITY_INFORMATION |
162                                SACL_SECURITY_INFORMATION |
163                                OWNER_SECURITY_INFORMATION |
164                                GROUP_SECURITY_INFORMATION;
165         /* Request length of security descriptor */
166         status = GetFileSecurityW(path_utf16, requestedInformation,
167                                   NULL, 0, &lenNeeded);
168         err = GetLastError();
169         if (!status && err == ERROR_INSUFFICIENT_BUFFER) {
170                 DWORD len = lenNeeded;
171                 char buf[len];
172                 if (GetFileSecurityW(path_utf16, requestedInformation,
173                                      (PSECURITY_DESCRIPTOR)buf, len, &lenNeeded))
174                 {
175                         int security_id = sd_set_add_sd(sd_set, buf, len);
176                         if (security_id < 0)
177                                 return WIMLIB_ERR_NOMEM;
178                         else {
179                                 dentry->d_inode->i_security_id = security_id;
180                                 return 0;
181                         }
182                 } else {
183                         err = GetLastError();
184                 }
185         }
186         ERROR("Win32 API: Failed to read security descriptor of \"%ls\"",
187               path_utf16);
188         win32_error(err);
189         return WIMLIB_ERR_READ;
190 }
191
192 /* Reads the directory entries of directory using a Win32 API and recursively
193  * calls build_dentry_tree() on them. */
194 static int win32_recurse_directory(struct wim_dentry *root,
195                                    const char *root_disk_path,
196                                    struct wim_lookup_table *lookup_table,
197                                    struct wim_security_data *sd,
198                                    const struct capture_config *config,
199                                    int add_image_flags,
200                                    wimlib_progress_func_t progress_func,
201                                    struct sd_set *sd_set,
202                                    const wchar_t *path_utf16,
203                                    size_t path_utf16_nchars)
204 {
205         WIN32_FIND_DATAW dat;
206         HANDLE hFind;
207         DWORD err;
208         int ret;
209
210         {
211                 /* Begin reading the directory by calling FindFirstFileW.
212                  * Unlike UNIX opendir(), FindFirstFileW has file globbing built
213                  * into it.  But this isn't what we actually want, so just add a
214                  * dummy glob to get all entries. */
215                 wchar_t pattern_buf[path_utf16_nchars + 3];
216                 memcpy(pattern_buf, path_utf16,
217                        path_utf16_nchars * sizeof(wchar_t));
218                 pattern_buf[path_utf16_nchars] = L'/';
219                 pattern_buf[path_utf16_nchars + 1] = L'*';
220                 pattern_buf[path_utf16_nchars + 2] = L'\0';
221                 hFind = FindFirstFileW(pattern_buf, &dat);
222         }
223         if (hFind == INVALID_HANDLE_VALUE) {
224                 err = GetLastError();
225                 if (err == ERROR_FILE_NOT_FOUND) {
226                         return 0;
227                 } else {
228                         ERROR("Win32 API: Failed to read directory \"%s\"",
229                               root_disk_path);
230                         win32_error(err);
231                         return WIMLIB_ERR_READ;
232                 }
233         }
234         ret = 0;
235         do {
236                 /* Skip . and .. entries */
237                 if (!(dat.cFileName[0] == L'.' &&
238                       (dat.cFileName[1] == L'\0' ||
239                        (dat.cFileName[1] == L'.' && dat.cFileName[2] == L'\0'))))
240                 {
241                         struct wim_dentry *child;
242
243                         char *utf8_name;
244                         size_t utf8_name_nbytes;
245                         ret = utf16_to_utf8((const char*)dat.cFileName,
246                                             wcslen(dat.cFileName) * sizeof(wchar_t),
247                                             &utf8_name,
248                                             &utf8_name_nbytes);
249                         if (ret)
250                                 goto out_find_close;
251
252                         char name[strlen(root_disk_path) + 1 + utf8_name_nbytes + 1];
253                         sprintf(name, "%s/%s", root_disk_path, utf8_name);
254                         FREE(utf8_name);
255                         ret = build_dentry_tree(&child, name, lookup_table,
256                                                 sd, config, add_image_flags,
257                                                 progress_func, sd_set);
258                         if (ret)
259                                 goto out_find_close;
260                         if (child)
261                                 dentry_add_child(root, child);
262                 }
263         } while (FindNextFileW(hFind, &dat));
264         err = GetLastError();
265         if (err != ERROR_NO_MORE_FILES) {
266                 ERROR("Win32 API: Failed to read directory \"%s\"", root_disk_path);
267                 win32_error(err);
268                 if (ret == 0)
269                         ret = WIMLIB_ERR_READ;
270         }
271 out_find_close:
272         FindClose(hFind);
273         return ret;
274 }
275
276 /* Load a reparse point into a WIM inode.  It is just stored in memory.
277  *
278  * @hFile:  Open handle to a reparse point, with permission to read the reparse
279  *          data.
280  *
281  * @inode:  WIM inode for the reparse point.
282  *
283  * @lookup_table:  Stream lookup table for the WIM; an entry will be added to it
284  *                 for the reparse point unless an entry already exists for
285  *                 the exact same data stream.
286  *
287  * @path:  External path to the parse point (UTF-8).  Used for error messages
288  *         only.
289  *
290  * Returns 0 on success; nonzero on failure. */
291 static int win32_capture_reparse_point(HANDLE hFile,
292                                        struct wim_inode *inode,
293                                        struct wim_lookup_table *lookup_table,
294                                        const char *path)
295 {
296         /* "Reparse point data, including the tag and optional GUID,
297          * cannot exceed 16 kilobytes." - MSDN  */
298         char reparse_point_buf[16 * 1024];
299         DWORD bytesReturned;
300
301         if (!DeviceIoControl(hFile, FSCTL_GET_REPARSE_POINT,
302                              NULL, 0, reparse_point_buf,
303                              sizeof(reparse_point_buf), &bytesReturned, NULL))
304         {
305                 DWORD err = GetLastError();
306                 ERROR("Win32 API: Failed to get reparse data of \"%s\"", path);
307                 win32_error(err);
308                 return WIMLIB_ERR_READ;
309         }
310         if (bytesReturned < 8) {
311                 ERROR("Reparse data on \"%s\" is invalid", path);
312                 return WIMLIB_ERR_READ;
313         }
314         inode->i_reparse_tag = *(u32*)reparse_point_buf;
315         return inode_add_ads_with_data(inode, "",
316                                        (const u8*)reparse_point_buf + 8,
317                                        bytesReturned - 8, lookup_table);
318 }
319
320 /* Calculate the SHA1 message digest of a Win32 data stream, which may be either
321  * an unnamed or named data stream.
322  *
323  * @path:       Path to the file, with the stream noted at the end for named
324  *              streams.  UTF-16LE encoding.
325  *
326  * @hash:       On success, the SHA1 message digest of the stream is written to
327  *              this location.
328  *
329  * Returns 0 on success; nonzero on failure.
330  */
331 static int win32_sha1sum(const wchar_t *path, u8 hash[SHA1_HASH_SIZE])
332 {
333         HANDLE hFile;
334         SHA_CTX ctx;
335         u8 buf[32768];
336         DWORD bytesRead;
337         int ret;
338
339         hFile = win32_open_file_readonly(path);
340         if (hFile == INVALID_HANDLE_VALUE)
341                 return WIMLIB_ERR_OPEN;
342
343         sha1_init(&ctx);
344         for (;;) {
345                 if (!ReadFile(hFile, buf, sizeof(buf), &bytesRead, NULL)) {
346                         ret = WIMLIB_ERR_READ;
347                         goto out_close_handle;
348                 }
349                 if (bytesRead == 0)
350                         break;
351                 sha1_update(&ctx, buf, bytesRead);
352         }
353         ret = 0;
354         sha1_final(hash, &ctx);
355 out_close_handle:
356         CloseHandle(hFile);
357         return ret;
358 }
359
360 /* Scans an unnamed or named stream of a Win32 file (not a reparse point
361  * stream); calculates its SHA1 message digest and either creates a `struct
362  * wim_lookup_table_entry' in memory for it, or uses an existing 'struct
363  * wim_lookup_table_entry' for an identical stream.
364  *
365  * @path_utf16:         Path to the file (UTF-16LE).
366  *
367  * @path_utf16_nchars:  Number of 2-byte characters in @path_utf16.
368  *
369  * @inode:              WIM inode to save the stream into.
370  *
371  * @lookup_table:       Stream lookup table for the WIM.
372  *
373  * @dat:                A `WIN32_FIND_STREAM_DATA' structure that specifies the
374  *                      stream name.
375  *
376  * Returns 0 on success; nonzero on failure.
377  */
378 static int win32_capture_stream(const wchar_t *path_utf16,
379                                 size_t path_utf16_nchars,
380                                 struct wim_inode *inode,
381                                 struct wim_lookup_table *lookup_table,
382                                 WIN32_FIND_STREAM_DATA *dat)
383 {
384         struct wim_ads_entry *ads_entry;
385         u8 hash[SHA1_HASH_SIZE];
386         struct wim_lookup_table_entry *lte;
387         int ret;
388         wchar_t *p, *colon;
389         bool is_named_stream;
390         wchar_t *spath;
391         size_t spath_nchars;
392         DWORD err;
393
394         /* The stream name should be returned as :NAME:TYPE */
395         p = dat->cStreamName;
396         if (*p != L':')
397                 goto out_invalid_stream_name;
398         p += 1;
399         colon = wcschr(p, L':');
400         if (colon == NULL)
401                 goto out_invalid_stream_name;
402
403         if (wcscmp(colon + 1, L"$DATA")) {
404                 /* Not a DATA stream */
405                 ret = 0;
406                 goto out;
407         }
408
409         is_named_stream = (p != colon);
410         if (is_named_stream) {
411                 /* Allocate an ADS entry for the named stream. */
412                 char *utf8_stream_name;
413                 size_t utf8_stream_name_len;
414                 ret = utf16_to_utf8((const char *)p,
415                                     (colon - p) * sizeof(wchar_t),
416                                     &utf8_stream_name,
417                                     &utf8_stream_name_len);
418                 if (ret)
419                         goto out;
420                 ads_entry = inode_add_ads(inode, utf8_stream_name);
421                 FREE(utf8_stream_name);
422                 if (!ads_entry) {
423                         ret = WIMLIB_ERR_NOMEM;
424                         goto out;
425                 }
426         }
427
428         /* Create a UTF-16 string @spath that gives the filename, then a colon,
429          * then the stream name.  Or, if it's an unnamed stream, just the
430          * filename.  It is MALLOC()'ed so that it can be saved in the
431          * wim_lookup_table_entry if needed. */
432         *colon = '\0';
433         spath_nchars = path_utf16_nchars;
434         if (is_named_stream)
435                 spath_nchars += colon - p + 1;
436
437         spath = MALLOC((spath_nchars + 1) * sizeof(wchar_t));
438         memcpy(spath, path_utf16, path_utf16_nchars * sizeof(wchar_t));
439         if (is_named_stream) {
440                 spath[path_utf16_nchars] = L':';
441                 memcpy(&spath[path_utf16_nchars + 1], p, (colon - p) * sizeof(wchar_t));
442         }
443         spath[spath_nchars] = L'\0';
444
445         ret = win32_sha1sum(spath, hash);
446         if (ret) {
447                 err = GetLastError();
448                 ERROR("Win32 API: Failed to read \"%ls\" to calculate SHA1sum",
449                       path_utf16);
450                 win32_error(err);
451                 goto out_free_spath;
452         }
453
454         lte = __lookup_resource(lookup_table, hash);
455         if (lte) {
456                 /* Use existing wim_lookup_table_entry that has the same SHA1
457                  * message digest */
458                 lte->refcnt++;
459         } else {
460                 /* Make a new wim_lookup_table_entry */
461                 lte = new_lookup_table_entry();
462                 if (!lte) {
463                         ret = WIMLIB_ERR_NOMEM;
464                         goto out_free_spath;
465                 }
466                 lte->file_on_disk = (char*)spath;
467                 spath = NULL;
468                 lte->resource_location = RESOURCE_WIN32;
469                 lte->resource_entry.original_size = (uint64_t)dat->StreamSize.QuadPart;
470                 lte->resource_entry.size = (uint64_t)dat->StreamSize.QuadPart;
471                 copy_hash(lte->hash, hash);
472                 lookup_table_insert(lookup_table, lte);
473         }
474         if (is_named_stream)
475                 ads_entry->lte = lte;
476         else
477                 inode->i_lte = lte;
478 out_free_spath:
479         FREE(spath);
480 out:
481         return ret;
482 out_invalid_stream_name:
483         ERROR("Invalid stream name: \"%ls:%ls\"", path_utf16, dat->cStreamName);
484         ret = WIMLIB_ERR_READ;
485         goto out;
486 }
487
488 /* Scans a Win32 file for unnamed and named data streams (not reparse point
489  * streams).
490  *
491  * @path_utf16:         Path to the file (UTF-16LE).
492  *
493  * @path_utf16_nchars:  Number of 2-byte characters in @path_utf16.
494  *
495  * @inode:              WIM inode to save the stream into.
496  *
497  * @lookup_table:       Stream lookup table for the WIM.
498  *
499  * Returns 0 on success; nonzero on failure.
500  */
501 static int win32_capture_streams(const wchar_t *path_utf16,
502                                  size_t path_utf16_nchars,
503                                  struct wim_inode *inode,
504                                  struct wim_lookup_table *lookup_table)
505 {
506         WIN32_FIND_STREAM_DATA dat;
507         int ret;
508         HANDLE hFind;
509         DWORD err;
510
511         hFind = FindFirstStreamW(path_utf16, FindStreamInfoStandard, &dat, 0);
512         if (hFind == INVALID_HANDLE_VALUE) {
513                 err = GetLastError();
514
515                 /* Seems legal for this to return ERROR_HANDLE_EOF on reparse
516                  * points and directories */
517                 if ((inode->i_attributes &
518                     (FILE_ATTRIBUTE_REPARSE_POINT | FILE_ATTRIBUTE_DIRECTORY))
519                     && err == ERROR_HANDLE_EOF)
520                 {
521                         return 0;
522                 } else {
523                         ERROR("Win32 API: Failed to look up data streams of \"%ls\"",
524                               path_utf16);
525                         win32_error(err);
526                         return WIMLIB_ERR_READ;
527                 }
528         }
529         do {
530                 ret = win32_capture_stream(path_utf16,
531                                            path_utf16_nchars,
532                                            inode, lookup_table,
533                                            &dat);
534                 if (ret)
535                         goto out_find_close;
536         } while (FindNextStreamW(hFind, &dat));
537         err = GetLastError();
538         if (err != ERROR_HANDLE_EOF) {
539                 ERROR("Win32 API: Error reading data streams from \"%ls\"", path_utf16);
540                 win32_error(err);
541                 ret = WIMLIB_ERR_READ;
542         }
543 out_find_close:
544         FindClose(hFind);
545         return ret;
546 }
547
548 #endif
549
550 /*
551  * build_dentry_tree():
552  *      Recursively builds a tree of WIM dentries from an on-disk directory
553  *      tree.
554  *
555  * @root_ret:   Place to return a pointer to the root of the dentry tree.  Only
556  *              modified if successful.  Set to NULL if the file or directory was
557  *              excluded from capture.
558  *
559  * @root_disk_path:  The path to the root of the directory tree on disk (UTF-8).
560  *
561  * @lookup_table: The lookup table for the WIM file.  For each file added to the
562  *              dentry tree being built, an entry is added to the lookup table,
563  *              unless an identical stream is already in the lookup table.
564  *              These lookup table entries that are added point to the path of
565  *              the file on disk.
566  *
567  * @sd:         Ignored.  (Security data only captured in NTFS mode.)
568  *
569  * @capture_config:
570  *              Configuration for files to be excluded from capture.
571  *
572  * @add_flags:  Bitwise or of WIMLIB_ADD_IMAGE_FLAG_*
573  *
574  * @extra_arg:  Ignored in UNIX builds; used to pass sd_set pointer in Windows
575  *              builds.
576  *
577  * @return:     0 on success, nonzero on failure.  It is a failure if any of
578  *              the files cannot be `stat'ed, or if any of the needed
579  *              directories cannot be opened or read.  Failure to add the files
580  *              to the WIM may still occur later when trying to actually read
581  *              the on-disk files during a call to wimlib_write() or
582  *              wimlib_overwrite().
583  */
584 static int build_dentry_tree(struct wim_dentry **root_ret,
585                              const char *root_disk_path,
586                              struct wim_lookup_table *lookup_table,
587                              struct wim_security_data *sd,
588                              const struct capture_config *config,
589                              int add_image_flags,
590                              wimlib_progress_func_t progress_func,
591                              void *extra_arg)
592 {
593         struct wim_dentry *root = NULL;
594         int ret = 0;
595         struct wim_inode *inode;
596
597         if (exclude_path(root_disk_path, config, true)) {
598                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) {
599                         ERROR("Cannot exclude the root directory from capture");
600                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
601                         goto out;
602                 }
603                 if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
604                     && progress_func)
605                 {
606                         union wimlib_progress_info info;
607                         info.scan.cur_path = root_disk_path;
608                         info.scan.excluded = true;
609                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
610                 }
611                 goto out;
612         }
613
614         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
615             && progress_func)
616         {
617                 union wimlib_progress_info info;
618                 info.scan.cur_path = root_disk_path;
619                 info.scan.excluded = false;
620                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
621         }
622
623 #if !defined(__CYGWIN__) && !defined(__WIN32__)
624         /* UNIX version of capturing a directory tree */
625         struct stat root_stbuf;
626         int (*stat_fn)(const char *restrict, struct stat *restrict);
627         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE)
628                 stat_fn = stat;
629         else
630                 stat_fn = lstat;
631
632         ret = (*stat_fn)(root_disk_path, &root_stbuf);
633         if (ret != 0) {
634                 ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
635                 goto out;
636         }
637
638         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) &&
639               !S_ISDIR(root_stbuf.st_mode))
640         {
641                 /* Do a dereference-stat in case the root is a symbolic link.
642                  * This case is allowed, provided that the symbolic link points
643                  * to a directory. */
644                 ret = stat(root_disk_path, &root_stbuf);
645                 if (ret != 0) {
646                         ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
647                         ret = WIMLIB_ERR_STAT;
648                         goto out;
649                 }
650                 if (!S_ISDIR(root_stbuf.st_mode)) {
651                         ERROR("`%s' is not a directory", root_disk_path);
652                         ret = WIMLIB_ERR_NOTDIR;
653                         goto out;
654                 }
655         }
656         if (!S_ISREG(root_stbuf.st_mode) && !S_ISDIR(root_stbuf.st_mode)
657             && !S_ISLNK(root_stbuf.st_mode)) {
658                 ERROR("`%s' is not a regular file, directory, or symbolic link.",
659                       root_disk_path);
660                 ret = WIMLIB_ERR_SPECIAL_FILE;
661                 goto out;
662         }
663
664         root = new_dentry_with_timeless_inode(path_basename(root_disk_path));
665         if (!root) {
666                 if (errno == EILSEQ)
667                         ret = WIMLIB_ERR_INVALID_UTF8_STRING;
668                 else if (errno == ENOMEM)
669                         ret = WIMLIB_ERR_NOMEM;
670                 else
671                         ret = WIMLIB_ERR_ICONV_NOT_AVAILABLE;
672                 goto out;
673         }
674
675         inode = root->d_inode;
676
677 #ifdef HAVE_STAT_NANOSECOND_PRECISION
678         inode->i_creation_time = timespec_to_wim_timestamp(&root_stbuf.st_mtim);
679         inode->i_last_write_time = timespec_to_wim_timestamp(&root_stbuf.st_mtim);
680         inode->i_last_access_time = timespec_to_wim_timestamp(&root_stbuf.st_atim);
681 #else
682         inode->i_creation_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
683         inode->i_last_write_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
684         inode->i_last_access_time = unix_timestamp_to_wim(root_stbuf.st_atime);
685 #endif
686         /* Leave the inode number at 0 for directories. */
687         if (!S_ISDIR(root_stbuf.st_mode)) {
688                 if (sizeof(ino_t) >= 8)
689                         inode->i_ino = (u64)root_stbuf.st_ino;
690                 else
691                         inode->i_ino = (u64)root_stbuf.st_ino |
692                                            ((u64)root_stbuf.st_dev <<
693                                                 ((sizeof(ino_t) * 8) & 63));
694         }
695         inode->i_resolved = 1;
696         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
697                 ret = inode_set_unix_data(inode, root_stbuf.st_uid,
698                                           root_stbuf.st_gid,
699                                           root_stbuf.st_mode,
700                                           lookup_table,
701                                           UNIX_DATA_ALL | UNIX_DATA_CREATE);
702                 if (ret)
703                         goto out;
704         }
705         add_image_flags &= ~(WIMLIB_ADD_IMAGE_FLAG_ROOT | WIMLIB_ADD_IMAGE_FLAG_SOURCE);
706         if (S_ISREG(root_stbuf.st_mode)) { /* Archiving a regular file */
707
708                 struct wim_lookup_table_entry *lte;
709                 u8 hash[SHA1_HASH_SIZE];
710
711                 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
712
713                 /* Empty files do not have to have a lookup table entry. */
714                 if (root_stbuf.st_size == 0)
715                         goto out;
716
717                 /* For each regular file, we must check to see if the file is in
718                  * the lookup table already; if it is, we increment its refcnt;
719                  * otherwise, we create a new lookup table entry and insert it.
720                  * */
721
722                 ret = sha1sum(root_disk_path, hash);
723                 if (ret != 0)
724                         goto out;
725
726                 lte = __lookup_resource(lookup_table, hash);
727                 if (lte) {
728                         lte->refcnt++;
729                         DEBUG("Add lte reference %u for `%s'", lte->refcnt,
730                               root_disk_path);
731                 } else {
732                         char *file_on_disk = STRDUP(root_disk_path);
733                         if (!file_on_disk) {
734                                 ERROR("Failed to allocate memory for file path");
735                                 ret = WIMLIB_ERR_NOMEM;
736                                 goto out;
737                         }
738                         lte = new_lookup_table_entry();
739                         if (!lte) {
740                                 FREE(file_on_disk);
741                                 ret = WIMLIB_ERR_NOMEM;
742                                 goto out;
743                         }
744                         lte->file_on_disk = file_on_disk;
745                         lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
746                         lte->resource_entry.original_size = root_stbuf.st_size;
747                         lte->resource_entry.size = root_stbuf.st_size;
748                         copy_hash(lte->hash, hash);
749                         lookup_table_insert(lookup_table, lte);
750                 }
751                 root->d_inode->i_lte = lte;
752         } else if (S_ISDIR(root_stbuf.st_mode)) { /* Archiving a directory */
753
754                 inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
755
756                 DIR *dir;
757                 struct dirent entry, *result;
758                 struct wim_dentry *child;
759
760                 dir = opendir(root_disk_path);
761                 if (!dir) {
762                         ERROR_WITH_ERRNO("Failed to open the directory `%s'",
763                                          root_disk_path);
764                         ret = WIMLIB_ERR_OPEN;
765                         goto out;
766                 }
767
768                 /* Buffer for names of files in directory. */
769                 size_t len = strlen(root_disk_path);
770                 char name[len + 1 + FILENAME_MAX + 1];
771                 memcpy(name, root_disk_path, len);
772                 name[len] = '/';
773
774                 /* Create a dentry for each entry in the directory on disk, and recurse
775                  * to any subdirectories. */
776                 while (1) {
777                         errno = 0;
778                         ret = readdir_r(dir, &entry, &result);
779                         if (ret != 0) {
780                                 ret = WIMLIB_ERR_READ;
781                                 ERROR_WITH_ERRNO("Error reading the "
782                                                  "directory `%s'",
783                                                  root_disk_path);
784                                 break;
785                         }
786                         if (result == NULL)
787                                 break;
788                         if (result->d_name[0] == '.' && (result->d_name[1] == '\0'
789                               || (result->d_name[1] == '.' && result->d_name[2] == '\0')))
790                                         continue;
791                         strcpy(name + len + 1, result->d_name);
792                         ret = build_dentry_tree(&child, name, lookup_table,
793                                                 NULL, config, add_image_flags,
794                                                 progress_func, NULL);
795                         if (ret != 0)
796                                 break;
797                         if (child)
798                                 dentry_add_child(root, child);
799                 }
800                 closedir(dir);
801         } else { /* Archiving a symbolic link */
802                 inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
803                 inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
804
805                 /* The idea here is to call readlink() to get the UNIX target of
806                  * the symbolic link, then turn the target into a reparse point
807                  * data buffer that contains a relative or absolute symbolic
808                  * link (NOT a junction point or *full* path symbolic link with
809                  * drive letter).
810                  */
811
812                 char deref_name_buf[4096];
813                 ssize_t deref_name_len;
814
815                 deref_name_len = readlink(root_disk_path, deref_name_buf,
816                                           sizeof(deref_name_buf) - 1);
817                 if (deref_name_len >= 0) {
818                         deref_name_buf[deref_name_len] = '\0';
819                         DEBUG("Read symlink `%s'", deref_name_buf);
820                         ret = inode_set_symlink(root->d_inode, deref_name_buf,
821                                                 lookup_table, NULL);
822                         if (ret == 0) {
823                                 /*
824                                  * Unfortunately, Windows seems to have the
825                                  * concept of "file" symbolic links as being
826                                  * different from "directory" symbolic links...
827                                  * so FILE_ATTRIBUTE_DIRECTORY needs to be set
828                                  * on the symbolic link if the *target* of the
829                                  * symbolic link is a directory.
830                                  */
831                                 struct stat stbuf;
832                                 if (stat(root_disk_path, &stbuf) == 0 &&
833                                     S_ISDIR(stbuf.st_mode))
834                                 {
835                                         inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
836                                 }
837                         }
838                 } else {
839                         ERROR_WITH_ERRNO("Failed to read target of "
840                                          "symbolic link `%s'", root_disk_path);
841                         ret = WIMLIB_ERR_READLINK;
842                 }
843         }
844 #else
845         /* Win32 version of capturing a directory tree */
846
847         wchar_t *path_utf16;
848         size_t path_utf16_nchars;
849         struct sd_set *sd_set;
850         DWORD err;
851
852         if (extra_arg == NULL) {
853                 sd_set = alloca(sizeof(struct sd_set));
854                 sd_set->rb_root.rb_node = NULL,
855                 sd_set->sd = sd;
856         } else {
857                 sd_set = extra_arg;
858         }
859
860         ret = utf8_to_utf16(root_disk_path, strlen(root_disk_path),
861                             (char**)&path_utf16, &path_utf16_nchars);
862         if (ret)
863                 goto out_destroy_sd_set;
864         path_utf16_nchars /= sizeof(wchar_t);
865
866         HANDLE hFile = win32_open_file_readonly(path_utf16);
867         if (hFile == INVALID_HANDLE_VALUE) {
868                 err = GetLastError();
869                 ERROR("Win32 API: Failed to open \"%s\"", root_disk_path);
870                 win32_error(err);
871                 ret = WIMLIB_ERR_OPEN;
872                 goto out_free_path_utf16;
873         }
874
875         BY_HANDLE_FILE_INFORMATION file_info;
876         if (!GetFileInformationByHandle(hFile, &file_info)) {
877                 err = GetLastError();
878                 ERROR("Win32 API: Failed to get file information for \"%s\"",
879                       root_disk_path);
880                 win32_error(err);
881                 ret = WIMLIB_ERR_STAT;
882                 goto out_close_handle;
883         }
884
885         /* Create a WIM dentry */
886         root = new_dentry_with_timeless_inode(path_basename(root_disk_path));
887         if (!root) {
888                 if (errno == EILSEQ)
889                         ret = WIMLIB_ERR_INVALID_UTF8_STRING;
890                 else if (errno == ENOMEM)
891                         ret = WIMLIB_ERR_NOMEM;
892                 else
893                         ret = WIMLIB_ERR_ICONV_NOT_AVAILABLE;
894                 goto out_close_handle;
895         }
896
897         /* Start preparing the associated WIM inode */
898         inode = root->d_inode;
899
900         inode->i_attributes = file_info.dwFileAttributes;
901         inode->i_creation_time = FILETIME_to_u64(&file_info.ftCreationTime);
902         inode->i_last_write_time = FILETIME_to_u64(&file_info.ftLastWriteTime);
903         inode->i_last_access_time = FILETIME_to_u64(&file_info.ftLastAccessTime);
904         inode->i_ino = ((u64)file_info.nFileIndexHigh << 32) |
905                         (u64)file_info.nFileIndexLow;
906
907         inode->i_resolved = 1;
908         add_image_flags &= ~(WIMLIB_ADD_IMAGE_FLAG_ROOT | WIMLIB_ADD_IMAGE_FLAG_SOURCE);
909
910         /* Get DOS name and security descriptor (if any). */
911         ret = win32_get_short_name(root, path_utf16);
912         if (ret)
913                 goto out_close_handle;
914         ret = win32_get_security_descriptor(root, sd_set, path_utf16);
915         if (ret)
916                 goto out_close_handle;
917
918         if (inode_is_directory(inode)) {
919                 /* Directory (not a reparse point) --- recurse to children */
920
921                 /* But first... directories may have alternate data streams that
922                  * need to be captured. */
923                 ret = win32_capture_streams(path_utf16,
924                                             path_utf16_nchars,
925                                             inode,
926                                             lookup_table);
927                 if (ret)
928                         goto out_close_handle;
929                 ret = win32_recurse_directory(root,
930                                               root_disk_path,
931                                               lookup_table,
932                                               sd,
933                                               config,
934                                               add_image_flags,
935                                               progress_func,
936                                               sd_set,
937                                               path_utf16,
938                                               path_utf16_nchars);
939         } else if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
940                 /* Reparse point: save the reparse tag and data */
941                 ret = win32_capture_reparse_point(hFile,
942                                                   inode,
943                                                   lookup_table,
944                                                   root_disk_path);
945         } else {
946                 /* Not a directory, not a reparse point; capture the default
947                  * file contents and any alternate data streams. */
948                 ret = win32_capture_streams(path_utf16,
949                                             path_utf16_nchars,
950                                             inode,
951                                             lookup_table);
952         }
953 out_close_handle:
954         CloseHandle(hFile);
955 out_free_path_utf16:
956         FREE(path_utf16);
957 out_destroy_sd_set:
958         if (extra_arg == NULL)
959                 destroy_sd_set(sd_set);
960 #endif
961         /* The below lines of code are common to both UNIX and Win32 builds.  It
962          * simply returns the captured directory tree if the capture was
963          * successful, or frees it if the capture was unsuccessful. */
964 out:
965         if (ret == 0)
966                 *root_ret = root;
967         else
968                 free_dentry_tree(root, lookup_table);
969         return ret;
970 }
971
972 enum pattern_type {
973         NONE = 0,
974         EXCLUSION_LIST,
975         EXCLUSION_EXCEPTION,
976         COMPRESSION_EXCLUSION_LIST,
977         ALIGNMENT_LIST,
978 };
979
980 #define COMPAT_DEFAULT_CONFIG
981
982 /* Default capture configuration file when none is specified. */
983 static const char *default_config =
984 #ifdef COMPAT_DEFAULT_CONFIG /* XXX: This policy is being moved to library
985                                 users.  The next ABI-incompatible library
986                                 version will default to the empty string here. */
987 "[ExclusionList]\n"
988 "\\$ntfs.log\n"
989 "\\hiberfil.sys\n"
990 "\\pagefile.sys\n"
991 "\\System Volume Information\n"
992 "\\RECYCLER\n"
993 "\\Windows\\CSC\n"
994 "\n"
995 "[CompressionExclusionList]\n"
996 "*.mp3\n"
997 "*.zip\n"
998 "*.cab\n"
999 "\\WINDOWS\\inf\\*.pnf\n";
1000 #else
1001 "";
1002 #endif
1003
1004 static void destroy_pattern_list(struct pattern_list *list)
1005 {
1006         FREE(list->pats);
1007 }
1008
1009 static void destroy_capture_config(struct capture_config *config)
1010 {
1011         destroy_pattern_list(&config->exclusion_list);
1012         destroy_pattern_list(&config->exclusion_exception);
1013         destroy_pattern_list(&config->compression_exclusion_list);
1014         destroy_pattern_list(&config->alignment_list);
1015         FREE(config->config_str);
1016         FREE(config->prefix);
1017         memset(config, 0, sizeof(*config));
1018 }
1019
1020 static int pattern_list_add_pattern(struct pattern_list *list,
1021                                     const char *pattern)
1022 {
1023         const char **pats;
1024         if (list->num_pats >= list->num_allocated_pats) {
1025                 pats = REALLOC(list->pats,
1026                                sizeof(list->pats[0]) * (list->num_allocated_pats + 8));
1027                 if (!pats)
1028                         return WIMLIB_ERR_NOMEM;
1029                 list->num_allocated_pats += 8;
1030                 list->pats = pats;
1031         }
1032         list->pats[list->num_pats++] = pattern;
1033         return 0;
1034 }
1035
1036 /* Parses the contents of the image capture configuration file and fills in a
1037  * `struct capture_config'. */
1038 static int init_capture_config(struct capture_config *config,
1039                                const char *_config_str, size_t config_len)
1040 {
1041         char *config_str;
1042         char *p;
1043         char *eol;
1044         char *next_p;
1045         size_t bytes_remaining;
1046         enum pattern_type type = NONE;
1047         int ret;
1048         unsigned long line_no = 0;
1049
1050         DEBUG("config_len = %zu", config_len);
1051         bytes_remaining = config_len;
1052         memset(config, 0, sizeof(*config));
1053         config_str = MALLOC(config_len);
1054         if (!config_str) {
1055                 ERROR("Could not duplicate capture config string");
1056                 return WIMLIB_ERR_NOMEM;
1057         }
1058
1059         memcpy(config_str, _config_str, config_len);
1060         next_p = config_str;
1061         config->config_str = config_str;
1062         while (bytes_remaining) {
1063                 line_no++;
1064                 p = next_p;
1065                 eol = memchr(p, '\n', bytes_remaining);
1066                 if (!eol) {
1067                         ERROR("Expected end-of-line in capture config file on "
1068                               "line %lu", line_no);
1069                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
1070                         goto out_destroy;
1071                 }
1072
1073                 next_p = eol + 1;
1074                 bytes_remaining -= (next_p - p);
1075                 if (eol == p)
1076                         continue;
1077
1078                 if (*(eol - 1) == '\r')
1079                         eol--;
1080                 *eol = '\0';
1081
1082                 /* Translate backslash to forward slash */
1083                 for (char *pp = p; pp != eol; pp++)
1084                         if (*pp == '\\')
1085                                 *pp = '/';
1086
1087                 /* Remove drive letter */
1088                 if (eol - p > 2 && isalpha(*p) && *(p + 1) == ':')
1089                         p += 2;
1090
1091                 ret = 0;
1092                 if (strcmp(p, "[ExclusionList]") == 0)
1093                         type = EXCLUSION_LIST;
1094                 else if (strcmp(p, "[ExclusionException]") == 0)
1095                         type = EXCLUSION_EXCEPTION;
1096                 else if (strcmp(p, "[CompressionExclusionList]") == 0)
1097                         type = COMPRESSION_EXCLUSION_LIST;
1098                 else if (strcmp(p, "[AlignmentList]") == 0)
1099                         type = ALIGNMENT_LIST;
1100                 else if (p[0] == '[' && strrchr(p, ']')) {
1101                         ERROR("Unknown capture configuration section `%s'", p);
1102                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
1103                 } else switch (type) {
1104                 case EXCLUSION_LIST:
1105                         DEBUG("Adding pattern \"%s\" to exclusion list", p);
1106                         ret = pattern_list_add_pattern(&config->exclusion_list, p);
1107                         break;
1108                 case EXCLUSION_EXCEPTION:
1109                         DEBUG("Adding pattern \"%s\" to exclusion exception list", p);
1110                         ret = pattern_list_add_pattern(&config->exclusion_exception, p);
1111                         break;
1112                 case COMPRESSION_EXCLUSION_LIST:
1113                         DEBUG("Adding pattern \"%s\" to compression exclusion list", p);
1114                         ret = pattern_list_add_pattern(&config->compression_exclusion_list, p);
1115                         break;
1116                 case ALIGNMENT_LIST:
1117                         DEBUG("Adding pattern \"%s\" to alignment list", p);
1118                         ret = pattern_list_add_pattern(&config->alignment_list, p);
1119                         break;
1120                 default:
1121                         ERROR("Line %lu of capture configuration is not "
1122                               "in a block (such as [ExclusionList])",
1123                               line_no);
1124                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
1125                         break;
1126                 }
1127                 if (ret != 0)
1128                         goto out_destroy;
1129         }
1130         return 0;
1131 out_destroy:
1132         destroy_capture_config(config);
1133         return ret;
1134 }
1135
1136 static int capture_config_set_prefix(struct capture_config *config,
1137                                      const char *_prefix)
1138 {
1139         char *prefix = STRDUP(_prefix);
1140
1141         if (!prefix)
1142                 return WIMLIB_ERR_NOMEM;
1143         FREE(config->prefix);
1144         config->prefix = prefix;
1145         config->prefix_len = strlen(prefix);
1146         return 0;
1147 }
1148
1149 static bool path_matches_pattern(const char *path, const char *pattern)
1150 {
1151 #ifdef __WIN32__
1152         return PathMatchSpecA(path, pattern);
1153 #else
1154         return fnmatch(pattern, path, FNM_PATHNAME
1155                         #ifdef FNM_CASEFOLD
1156                                         | FNM_CASEFOLD
1157                         #endif
1158                 ) == 0;
1159 #endif
1160 }
1161
1162 static bool match_pattern(const char *path, const char *path_basename,
1163                           const struct pattern_list *list)
1164 {
1165         for (size_t i = 0; i < list->num_pats; i++) {
1166                 const char *pat = list->pats[i];
1167                 const char *string;
1168                 if (pat[0] == '/')
1169                         /* Absolute path from root of capture */
1170                         string = path;
1171                 else {
1172                         if (strchr(pat, '/'))
1173                                 /* Relative path from root of capture */
1174                                 string = path + 1;
1175                         else
1176                                 /* A file name pattern */
1177                                 string = path_basename;
1178                 }
1179
1180                 if (path_matches_pattern(string, pat)) {
1181                         DEBUG("`%s' matches the pattern \"%s\"",
1182                               string, pat);
1183                         return true;
1184                 }
1185         }
1186         return false;
1187 }
1188
1189 /* Return true if the image capture configuration file indicates we should
1190  * exclude the filename @path from capture.
1191  *
1192  * If @exclude_prefix is %true, the part of the path up and including the name
1193  * of the directory being captured is not included in the path for matching
1194  * purposes.  This allows, for example, a pattern like /hiberfil.sys to match a
1195  * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7
1196  * directory.
1197  */
1198 bool exclude_path(const char *path, const struct capture_config *config,
1199                   bool exclude_prefix)
1200 {
1201         const char *basename = path_basename(path);
1202         if (exclude_prefix) {
1203                 wimlib_assert(strlen(path) >= config->prefix_len);
1204                 if (memcmp(config->prefix, path, config->prefix_len) == 0
1205                      && path[config->prefix_len] == '/')
1206                         path += config->prefix_len;
1207         }
1208         return match_pattern(path, basename, &config->exclusion_list) &&
1209                 !match_pattern(path, basename, &config->exclusion_exception);
1210
1211 }
1212
1213 /* Strip leading and trailing forward slashes from a string.  Modifies it in
1214  * place and returns the stripped string. */
1215 static const char *canonicalize_target_path(char *target_path)
1216 {
1217         char *p;
1218         if (target_path == NULL)
1219                 return "";
1220         for (;;) {
1221                 if (*target_path == '\0')
1222                         return target_path;
1223                 else if (*target_path == '/')
1224                         target_path++;
1225                 else
1226                         break;
1227         }
1228
1229         p = target_path + strlen(target_path) - 1;
1230         while (*p == '/')
1231                 *p-- = '\0';
1232         return target_path;
1233 }
1234
1235 #if defined(__CYGWIN__) || defined(__WIN32__)
1236 static void zap_backslashes(char *s)
1237 {
1238         while (*s) {
1239                 if (*s == '\\')
1240                         *s = '/';
1241                 s++;
1242         }
1243 }
1244 #endif
1245
1246 /* Strip leading and trailing slashes from the target paths */
1247 static void canonicalize_targets(struct wimlib_capture_source *sources,
1248                                  size_t num_sources)
1249 {
1250         while (num_sources--) {
1251                 DEBUG("Canonicalizing { source: \"%s\", target=\"%s\"}",
1252                       sources->fs_source_path,
1253                       sources->wim_target_path);
1254 #if defined(__CYGWIN__) || defined(__WIN32__)
1255                 /* The Windows API can handle forward slashes.  Just get rid of
1256                  * backslashes to avoid confusing other parts of the library
1257                  * code. */
1258                 zap_backslashes(sources->fs_source_path);
1259                 if (sources->wim_target_path)
1260                         zap_backslashes(sources->wim_target_path);
1261 #endif
1262                 sources->wim_target_path =
1263                         (char*)canonicalize_target_path(sources->wim_target_path);
1264                 DEBUG("Canonical target: \"%s\"", sources->wim_target_path);
1265                 sources++;
1266         }
1267 }
1268
1269 static int capture_source_cmp(const void *p1, const void *p2)
1270 {
1271         const struct wimlib_capture_source *s1 = p1, *s2 = p2;
1272         return strcmp(s1->wim_target_path, s2->wim_target_path);
1273 }
1274
1275 /* Sorts the capture sources lexicographically by target path.  This occurs
1276  * after leading and trailing forward slashes are stripped.
1277  *
1278  * One purpose of this is to make sure that target paths that are inside other
1279  * target paths are added after the containing target paths. */
1280 static void sort_sources(struct wimlib_capture_source *sources,
1281                          size_t num_sources)
1282 {
1283         qsort(sources, num_sources, sizeof(sources[0]), capture_source_cmp);
1284 }
1285
1286 static int check_sorted_sources(struct wimlib_capture_source *sources,
1287                                 size_t num_sources, int add_image_flags)
1288 {
1289         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
1290                 if (num_sources != 1) {
1291                         ERROR("Must specify exactly 1 capture source "
1292                               "(the NTFS volume) in NTFS mode!");
1293                         return WIMLIB_ERR_INVALID_PARAM;
1294                 }
1295                 if (sources[0].wim_target_path[0] != '\0') {
1296                         ERROR("In NTFS capture mode the target path inside "
1297                               "the image must be the root directory!");
1298                         return WIMLIB_ERR_INVALID_PARAM;
1299                 }
1300         } else if (num_sources != 0) {
1301                 /* This code is disabled because the current code
1302                  * unconditionally attempts to do overlays.  So, duplicate
1303                  * target paths are OK. */
1304         #if 0
1305                 if (num_sources > 1 && sources[0].wim_target_path[0] == '\0') {
1306                         ERROR("Cannot specify root target when using multiple "
1307                               "capture sources!");
1308                         return WIMLIB_ERR_INVALID_PARAM;
1309                 }
1310                 for (size_t i = 0; i < num_sources - 1; i++) {
1311                         size_t len = strlen(sources[i].wim_target_path);
1312                         size_t j = i + 1;
1313                         const char *target1 = sources[i].wim_target_path;
1314                         do {
1315                                 const char *target2 = sources[j].wim_target_path;
1316                                 DEBUG("target1=%s, target2=%s",
1317                                       target1,target2);
1318                                 if (strncmp(target1, target2, len) ||
1319                                     target2[len] > '/')
1320                                         break;
1321                                 if (target2[len] == '/') {
1322                                         ERROR("Invalid target `%s': is a prefix of `%s'",
1323                                               target1, target2);
1324                                         return WIMLIB_ERR_INVALID_PARAM;
1325                                 }
1326                                 if (target2[len] == '\0') {
1327                                         ERROR("Invalid target `%s': is a duplicate of `%s'",
1328                                               target1, target2);
1329                                         return WIMLIB_ERR_INVALID_PARAM;
1330                                 }
1331                         } while (++j != num_sources);
1332                 }
1333         #endif
1334         }
1335         return 0;
1336
1337 }
1338
1339 /* Creates a new directory to place in the WIM image.  This is to create parent
1340  * directories that are not part of any target as needed.  */
1341 static struct wim_dentry *
1342 new_filler_directory(const char *name)
1343 {
1344         struct wim_dentry *dentry;
1345         DEBUG("Creating filler directory \"%s\"", name);
1346         dentry = new_dentry_with_inode(name);
1347         if (dentry) {
1348                 /* Leave the inode number as 0 for now.  The final inode number
1349                  * will be assigned later by assign_inode_numbers(). */
1350                 dentry->d_inode->i_resolved = 1;
1351                 dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
1352         }
1353         return dentry;
1354 }
1355
1356 /* Transfers the children of @branch to @target.  It is an error if @target is
1357  * not a directory or if both @branch and @target contain a child dentry with
1358  * the same name. */
1359 static int do_overlay(struct wim_dentry *target, struct wim_dentry *branch)
1360 {
1361         struct rb_root *rb_root;
1362
1363         DEBUG("Doing overlay %s => %s",
1364               branch->file_name_utf8, target->file_name_utf8);
1365
1366         if (!dentry_is_directory(target)) {
1367                 ERROR("Cannot overlay directory `%s' over non-directory",
1368                       branch->file_name_utf8);
1369                 return WIMLIB_ERR_INVALID_OVERLAY;
1370         }
1371
1372         rb_root = &branch->d_inode->i_children;
1373         while (rb_root->rb_node) { /* While @branch has children... */
1374                 struct wim_dentry *child = rbnode_dentry(rb_root->rb_node);
1375                 /* Move @child to the directory @target */
1376                 unlink_dentry(child);
1377                 if (!dentry_add_child(target, child)) {
1378                         /* Revert the change to avoid leaking the directory tree
1379                          * rooted at @child */
1380                         dentry_add_child(branch, child);
1381                         ERROR("Overlay error: file `%s' already exists "
1382                               "as a child of `%s'",
1383                               child->file_name_utf8, target->file_name_utf8);
1384                         return WIMLIB_ERR_INVALID_OVERLAY;
1385                 }
1386         }
1387         free_dentry(branch);
1388         return 0;
1389
1390 }
1391
1392 /* Attach or overlay a branch onto the WIM image.
1393  *
1394  * @root_p:
1395  *      Pointer to the root of the WIM image, or pointer to NULL if it has not
1396  *      been created yet.
1397  * @branch
1398  *      Branch to add.
1399  * @target_path:
1400  *      Path in the WIM image to add the branch, with leading and trailing
1401  *      slashes stripped.
1402  */
1403 static int attach_branch(struct wim_dentry **root_p,
1404                          struct wim_dentry *branch,
1405                          char *target_path)
1406 {
1407         char *slash;
1408         struct wim_dentry *dentry, *parent, *target;
1409
1410         DEBUG("Attaching branch \"%s\" => \"%s\"",
1411               branch->file_name_utf8, target_path);
1412
1413         if (*target_path == '\0') {
1414                 /* Target: root directory */
1415                 if (*root_p) {
1416                         /* Overlay on existing root */
1417                         return do_overlay(*root_p, branch);
1418                 } else  {
1419                         /* Set as root */
1420                         *root_p = branch;
1421                         return 0;
1422                 }
1423         }
1424
1425         /* Adding a non-root branch.  Create root if it hasn't been created
1426          * already. */
1427         if (!*root_p) {
1428                 *root_p = new_filler_directory("");
1429                 if (!*root_p)
1430                         return WIMLIB_ERR_NOMEM;
1431         }
1432
1433         /* Walk the path to the branch, creating filler directories as needed.
1434          * */
1435         parent = *root_p;
1436         while ((slash = strchr(target_path, '/'))) {
1437                 *slash = '\0';
1438                 dentry = get_dentry_child_with_name(parent, target_path);
1439                 if (!dentry) {
1440                         dentry = new_filler_directory(target_path);
1441                         if (!dentry)
1442                                 return WIMLIB_ERR_NOMEM;
1443                         dentry_add_child(parent, dentry);
1444                 }
1445                 parent = dentry;
1446                 target_path = slash;
1447                 /* Skip over slashes.  Note: this cannot overrun the length of
1448                  * the string because the last character cannot be a slash, as
1449                  * trailing slashes were tripped.  */
1450                 do {
1451                         ++target_path;
1452                 } while (*target_path == '/');
1453         }
1454
1455         /* If the target path already existed, overlay the branch onto it.
1456          * Otherwise, set the branch as the target path. */
1457         target = get_dentry_child_with_name(parent, branch->file_name_utf8);
1458         if (target) {
1459                 return do_overlay(target, branch);
1460         } else {
1461                 dentry_add_child(parent, branch);
1462                 return 0;
1463         }
1464 }
1465
1466 WIMLIBAPI int wimlib_add_image_multisource(WIMStruct *w,
1467                                            struct wimlib_capture_source *sources,
1468                                            size_t num_sources,
1469                                            const char *name,
1470                                            const char *config_str,
1471                                            size_t config_len,
1472                                            int add_image_flags,
1473                                            wimlib_progress_func_t progress_func)
1474 {
1475         int (*capture_tree)(struct wim_dentry **, const char *,
1476                             struct wim_lookup_table *,
1477                             struct wim_security_data *,
1478                             const struct capture_config *,
1479                             int, wimlib_progress_func_t, void *);
1480         void *extra_arg;
1481         struct wim_dentry *root_dentry;
1482         struct wim_dentry *branch;
1483         struct wim_security_data *sd;
1484         struct capture_config config;
1485         struct wim_image_metadata *imd;
1486         int ret;
1487
1488         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
1489 #ifdef WITH_NTFS_3G
1490                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
1491                         ERROR("Cannot dereference files when capturing directly from NTFS");
1492                         return WIMLIB_ERR_INVALID_PARAM;
1493                 }
1494                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
1495                         ERROR("Capturing UNIX owner and mode not supported "
1496                               "when capturing directly from NTFS");
1497                         return WIMLIB_ERR_INVALID_PARAM;
1498                 }
1499                 capture_tree = build_dentry_tree_ntfs;
1500                 extra_arg = &w->ntfs_vol;
1501 #else
1502                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
1503                       "        cannot capture a WIM image directly from a NTFS volume!");
1504                 return WIMLIB_ERR_UNSUPPORTED;
1505 #endif
1506         } else {
1507                 capture_tree = build_dentry_tree;
1508                 extra_arg = NULL;
1509         }
1510
1511 #if defined(__CYGWIN__) || defined(__WIN32__)
1512         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
1513                 ERROR("Capturing UNIX-specific data is not supported on Windows");
1514                 return WIMLIB_ERR_INVALID_PARAM;
1515         }
1516         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
1517                 ERROR("Dereferencing symbolic links is not supported on Windows");
1518                 return WIMLIB_ERR_INVALID_PARAM;
1519         }
1520 #endif
1521
1522         if (!name || !*name) {
1523                 ERROR("Must specify a non-empty string for the image name");
1524                 return WIMLIB_ERR_INVALID_PARAM;
1525         }
1526
1527         if (w->hdr.total_parts != 1) {
1528                 ERROR("Cannot add an image to a split WIM");
1529                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
1530         }
1531
1532         if (wimlib_image_name_in_use(w, name)) {
1533                 ERROR("There is already an image named \"%s\" in `%s'",
1534                       name, w->filename);
1535                 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
1536         }
1537
1538         if (!config_str) {
1539                 DEBUG("Using default capture configuration");
1540                 config_str = default_config;
1541                 config_len = strlen(default_config);
1542         }
1543         ret = init_capture_config(&config, config_str, config_len);
1544         if (ret)
1545                 goto out;
1546
1547         DEBUG("Allocating security data");
1548         sd = CALLOC(1, sizeof(struct wim_security_data));
1549         if (!sd) {
1550                 ret = WIMLIB_ERR_NOMEM;
1551                 goto out_destroy_capture_config;
1552         }
1553         sd->total_length = 8;
1554         sd->refcnt = 1;
1555
1556         DEBUG("Using %zu capture sources", num_sources);
1557         canonicalize_targets(sources, num_sources);
1558         sort_sources(sources, num_sources);
1559         ret = check_sorted_sources(sources, num_sources, add_image_flags);
1560         if (ret) {
1561                 ret = WIMLIB_ERR_INVALID_PARAM;
1562                 goto out_free_security_data;
1563         }
1564
1565         DEBUG("Building dentry tree.");
1566         if (num_sources == 0) {
1567                 root_dentry = new_filler_directory("");
1568                 if (!root_dentry) {
1569                         ret = WIMLIB_ERR_NOMEM;
1570                         goto out_free_security_data;
1571                 }
1572         } else {
1573                 size_t i;
1574
1575 #if defined(__CYGWIN__) || defined(__WIN32__)
1576                 win32_acquire_privilege(SE_BACKUP_NAME);
1577                 win32_acquire_privilege(SE_SECURITY_NAME);
1578                 win32_acquire_privilege(SE_TAKE_OWNERSHIP_NAME);
1579 #endif
1580                 root_dentry = NULL;
1581                 i = 0;
1582                 do {
1583                         int flags;
1584                         union wimlib_progress_info progress;
1585
1586                         DEBUG("Building dentry tree for source %zu of %zu "
1587                               "(\"%s\" => \"%s\")", i + 1, num_sources,
1588                               sources[i].fs_source_path,
1589                               sources[i].wim_target_path);
1590                         if (progress_func) {
1591                                 memset(&progress, 0, sizeof(progress));
1592                                 progress.scan.source = sources[i].fs_source_path;
1593                                 progress.scan.wim_target_path = sources[i].wim_target_path;
1594                                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_BEGIN, &progress);
1595                         }
1596                         ret = capture_config_set_prefix(&config,
1597                                                         sources[i].fs_source_path);
1598                         if (ret)
1599                                 goto out_free_dentry_tree;
1600                         flags = add_image_flags | WIMLIB_ADD_IMAGE_FLAG_SOURCE;
1601                         if (!*sources[i].wim_target_path)
1602                                 flags |= WIMLIB_ADD_IMAGE_FLAG_ROOT;
1603                         ret = (*capture_tree)(&branch, sources[i].fs_source_path,
1604                                               w->lookup_table, sd,
1605                                               &config,
1606                                               flags,
1607                                               progress_func, extra_arg);
1608                         if (ret) {
1609                                 ERROR("Failed to build dentry tree for `%s'",
1610                                       sources[i].fs_source_path);
1611                                 goto out_free_dentry_tree;
1612                         }
1613                         if (branch) {
1614                                 /* Use the target name, not the source name, for
1615                                  * the root of each branch from a capture
1616                                  * source.  (This will also set the root dentry
1617                                  * of the entire image to be unnamed.) */
1618                                 ret = set_dentry_name(branch,
1619                                                       path_basename(sources[i].wim_target_path));
1620                                 if (ret)
1621                                         goto out_free_branch;
1622
1623                                 ret = attach_branch(&root_dentry, branch,
1624                                                     sources[i].wim_target_path);
1625                                 if (ret)
1626                                         goto out_free_branch;
1627                         }
1628                         if (progress_func)
1629                                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_END, &progress);
1630                 } while (++i != num_sources);
1631         }
1632
1633         DEBUG("Calculating full paths of dentries.");
1634         ret = for_dentry_in_tree(root_dentry, calculate_dentry_full_path, NULL);
1635         if (ret != 0)
1636                 goto out_free_dentry_tree;
1637
1638         ret = add_new_dentry_tree(w, root_dentry, sd);
1639         if (ret != 0)
1640                 goto out_free_dentry_tree;
1641
1642         imd = &w->image_metadata[w->hdr.image_count - 1];
1643
1644         ret = dentry_tree_fix_inodes(root_dentry, &imd->inode_list);
1645         if (ret != 0)
1646                 goto out_destroy_imd;
1647
1648         DEBUG("Assigning hard link group IDs");
1649         assign_inode_numbers(&imd->inode_list);
1650
1651         ret = xml_add_image(w, name);
1652         if (ret != 0)
1653                 goto out_destroy_imd;
1654
1655         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_BOOT)
1656                 wimlib_set_boot_idx(w, w->hdr.image_count);
1657         ret = 0;
1658         goto out_destroy_capture_config;
1659 out_destroy_imd:
1660         destroy_image_metadata(&w->image_metadata[w->hdr.image_count - 1],
1661                                w->lookup_table);
1662         w->hdr.image_count--;
1663         goto out;
1664 out_free_branch:
1665         free_dentry_tree(branch, w->lookup_table);
1666 out_free_dentry_tree:
1667         free_dentry_tree(root_dentry, w->lookup_table);
1668 out_free_security_data:
1669         free_security_data(sd);
1670 out_destroy_capture_config:
1671         destroy_capture_config(&config);
1672 out:
1673 #if defined(__CYGWIN__) || defined(__WIN32__)
1674         win32_release_privilege(SE_BACKUP_NAME);
1675         win32_release_privilege(SE_SECURITY_NAME);
1676         win32_release_privilege(SE_TAKE_OWNERSHIP_NAME);
1677 #endif
1678         return ret;
1679 }
1680
1681 WIMLIBAPI int wimlib_add_image(WIMStruct *w, const char *source,
1682                                const char *name, const char *config_str,
1683                                size_t config_len, int add_image_flags,
1684                                wimlib_progress_func_t progress_func)
1685 {
1686         if (!source || !*source)
1687                 return WIMLIB_ERR_INVALID_PARAM;
1688
1689         char *fs_source_path = STRDUP(source);
1690         int ret;
1691         struct wimlib_capture_source capture_src = {
1692                 .fs_source_path = fs_source_path,
1693                 .wim_target_path = NULL,
1694                 .reserved = 0,
1695         };
1696         ret = wimlib_add_image_multisource(w, &capture_src, 1, name,
1697                                            config_str, config_len,
1698                                            add_image_flags, progress_func);
1699         FREE(fs_source_path);
1700         return ret;
1701 }