]> wimlib.net Git - wimlib/blob - src/add_image.c
b5f8c06956e57e14172181da98eb1424cf2c98ca
[wimlib] / src / add_image.c
1 /*
2  * add_image.c
3  */
4
5 /*
6  * Copyright (C) 2012, 2013 Eric Biggers
7  *
8  * This file is part of wimlib, a library for working with WIM files.
9  *
10  * wimlib is free software; you can redistribute it and/or modify it under the
11  * terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option)
13  * any later version.
14  *
15  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17  * A PARTICULAR PURPOSE. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with wimlib; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #include "config.h"
25
26 #if defined(__CYGWIN__) || defined(__WIN32__)
27 #       include <windows.h>
28 #       include <ntdef.h>
29 #       include <wchar.h>
30 #       ifdef ERROR
31 #               undef ERROR
32 #       endif
33 #       include "security.h"
34 #else
35 #       include <dirent.h>
36 #       include <sys/stat.h>
37 #       include "timestamp.h"
38 #endif
39
40 #include "wimlib_internal.h"
41 #include "dentry.h"
42 #include "lookup_table.h"
43 #include "xml.h"
44 #include <ctype.h>
45 #include <errno.h>
46 #include <fnmatch.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50
51 #ifdef HAVE_ALLOCA_H
52 #include <alloca.h>
53 #endif
54
55 #define WIMLIB_ADD_IMAGE_FLAG_ROOT      0x80000000
56 #define WIMLIB_ADD_IMAGE_FLAG_SOURCE    0x40000000
57
58 /*
59  * Adds the dentry tree and security data for a new image to the image metadata
60  * array of the WIMStruct.
61  */
62 int add_new_dentry_tree(WIMStruct *w, struct wim_dentry *root_dentry,
63                         struct wim_security_data *sd)
64 {
65         struct wim_lookup_table_entry *metadata_lte;
66         struct wim_image_metadata *imd;
67         struct wim_image_metadata *new_imd;
68
69         wimlib_assert(root_dentry != NULL);
70
71         DEBUG("Reallocating image metadata array for image_count = %u",
72               w->hdr.image_count + 1);
73         imd = CALLOC((w->hdr.image_count + 1), sizeof(struct wim_image_metadata));
74
75         if (!imd) {
76                 ERROR("Failed to allocate memory for new image metadata array");
77                 goto err;
78         }
79
80         memcpy(imd, w->image_metadata,
81                w->hdr.image_count * sizeof(struct wim_image_metadata));
82
83         metadata_lte = new_lookup_table_entry();
84         if (!metadata_lte)
85                 goto err_free_imd;
86
87         metadata_lte->resource_entry.flags = WIM_RESHDR_FLAG_METADATA;
88         random_hash(metadata_lte->hash);
89         lookup_table_insert(w->lookup_table, metadata_lte);
90
91         new_imd = &imd[w->hdr.image_count];
92
93         new_imd->root_dentry    = root_dentry;
94         new_imd->metadata_lte   = metadata_lte;
95         new_imd->security_data  = sd;
96         new_imd->modified       = 1;
97
98         FREE(w->image_metadata);
99         w->image_metadata = imd;
100         w->hdr.image_count++;
101         return 0;
102 err_free_imd:
103         FREE(imd);
104 err:
105         return WIMLIB_ERR_NOMEM;
106
107 }
108
109 #if defined(__CYGWIN__) || defined(__WIN32__)
110
111 static u64 FILETIME_to_u64(const FILETIME *ft)
112 {
113         return ((u64)ft->dwHighDateTime << 32) | (u64)ft->dwLowDateTime;
114 }
115
116
117 static int build_dentry_tree(struct wim_dentry **root_ret,
118                              const char *root_disk_path,
119                              struct wim_lookup_table *lookup_table,
120                              struct wim_security_data *sd,
121                              const struct capture_config *config,
122                              int add_image_flags,
123                              wimlib_progress_func_t progress_func,
124                              void *extra_arg);
125
126 static int win32_get_short_name(struct wim_dentry *dentry,
127                                 const wchar_t *path_utf16)
128 {
129         WIN32_FIND_DATAW dat;
130         if (FindFirstFileW(path_utf16, &dat) &&
131             dat.cAlternateFileName[0] != L'\0')
132         {
133                 size_t short_name_len = wcslen(dat.cAlternateFileName) * 2;
134                 size_t n = short_name_len + sizeof(wchar_t);
135                 dentry->short_name = MALLOC(n);
136                 if (!dentry->short_name)
137                         return WIMLIB_ERR_NOMEM;
138                 memcpy(dentry->short_name, dat.cAlternateFileName, n);
139                 dentry->short_name_len = short_name_len;
140         }
141         return 0;
142 }
143
144 static int win32_get_security_descriptor(struct wim_dentry *dentry,
145                                          struct sd_set *sd_set,
146                                          const wchar_t *path_utf16)
147 {
148         SECURITY_INFORMATION requestedInformation;
149         DWORD lenNeeded = 0;
150         BOOL status;
151         DWORD err;
152
153         requestedInformation = DACL_SECURITY_INFORMATION |
154                                SACL_SECURITY_INFORMATION |
155                                OWNER_SECURITY_INFORMATION |
156                                GROUP_SECURITY_INFORMATION;
157         /* Request length of security descriptor */
158         status = GetFileSecurityW(path_utf16, requestedInformation,
159                                   NULL, 0, &lenNeeded);
160         err = GetLastError();
161         if (!status && err == ERROR_INSUFFICIENT_BUFFER) {
162                 DWORD len = lenNeeded;
163                 char buf[len];
164                 if (GetFileSecurityW(path_utf16, requestedInformation,
165                                      buf, len, &lenNeeded))
166                 {
167                         int security_id = sd_set_add_sd(sd_set, buf, len);
168                         if (security_id < 0)
169                                 return WIMLIB_ERR_NOMEM;
170                         else {
171                                 dentry->d_inode->i_security_id = security_id;
172                                 return 0;
173                         }
174                 } else {
175                         err = GetLastError();
176                 }
177         }
178         ERROR("Win32 API: Failed to read security descriptor of \"%ls\"",
179               path_utf16);
180         win32_error(err);
181         return WIMLIB_ERR_READ;
182 }
183
184 /* Reads the directory entries of directory using a Win32 API and recursively
185  * calls build_dentry_tree() on them. */
186 static int win32_recurse_directory(struct wim_dentry *root,
187                                    const char *root_disk_path,
188                                    struct wim_lookup_table *lookup_table,
189                                    struct wim_security_data *sd,
190                                    const struct capture_config *config,
191                                    int add_image_flags,
192                                    wimlib_progress_func_t progress_func,
193                                    struct sd_set *sd_set,
194                                    const wchar_t *path_utf16,
195                                    size_t path_utf16_nchars)
196 {
197         WIN32_FIND_DATAW dat;
198         HANDLE hFind;
199         DWORD err;
200         int ret;
201
202         {
203                 /* Begin reading the directory by calling FindFirstFileW.
204                  * Unlike UNIX opendir(), FindFirstFileW has file globbing built
205                  * into it.  But this isn't what we actually want, so just add a
206                  * dummy glob to get all entries. */
207                 wchar_t pattern_buf[path_utf16_nchars + 3];
208                 memcpy(pattern_buf, path_utf16,
209                        path_utf16_nchars * sizeof(wchar_t));
210                 pattern_buf[path_utf16_nchars] = L'/';
211                 pattern_buf[path_utf16_nchars + 1] = L'*';
212                 pattern_buf[path_utf16_nchars + 2] = L'\0';
213                 hFind = FindFirstFileW(pattern_buf, &dat);
214         }
215         if (hFind == INVALID_HANDLE_VALUE) {
216                 err = GetLastError();
217                 if (err == ERROR_FILE_NOT_FOUND) {
218                         return 0;
219                 } else {
220                         ERROR("Win32 API: Failed to read directory \"%s\"",
221                               root_disk_path);
222                         win32_error(err);
223                         return WIMLIB_ERR_READ;
224                 }
225         }
226         ret = 0;
227         do {
228                 /* Skip . and .. entries */
229                 if (!(dat.cFileName[0] == L'.' &&
230                       (dat.cFileName[1] == L'\0' ||
231                        (dat.cFileName[1] == L'.' && dat.cFileName[2] == L'\0'))))
232                 {
233                         struct wim_dentry *child;
234
235                         char *utf8_name;
236                         size_t utf8_name_nbytes;
237                         ret = utf16_to_utf8((const char*)dat.cFileName,
238                                             wcslen(dat.cFileName) * sizeof(wchar_t),
239                                             &utf8_name,
240                                             &utf8_name_nbytes);
241                         if (ret)
242                                 goto out_find_close;
243
244                         char name[strlen(root_disk_path) + 1 + utf8_name_nbytes + 1];
245                         sprintf(name, "%s/%s", root_disk_path, utf8_name);
246                         FREE(utf8_name);
247                         ret = build_dentry_tree(&child, name, lookup_table,
248                                                 sd, config, add_image_flags,
249                                                 progress_func, sd_set);
250                         if (ret)
251                                 goto out_find_close;
252                         if (child)
253                                 dentry_add_child(root, child);
254                 }
255         } while (FindNextFileW(hFind, &dat));
256         err = GetLastError();
257         if (err != ERROR_NO_MORE_FILES) {
258                 ERROR("Win32 API: Failed to read directory \"%s\"", root_disk_path);
259                 win32_error(err);
260                 if (ret == 0)
261                         ret = WIMLIB_ERR_READ;
262         }
263 out_find_close:
264         FindClose(hFind);
265         return ret;
266 }
267
268 /* Load a reparse point into a WIM inode.  It is just stored in memory.
269  *
270  * @hFile:  Open handle to a reparse point, with permission to read the reparse
271  *          data.
272  *
273  * @inode:  WIM inode for the reparse point.
274  *
275  * @lookup_table:  Stream lookup table for the WIM; an entry will be added to it
276  *                 for the reparse point unless an entry already exists for
277  *                 the exact same data stream.
278  *
279  * @path:  External path to the parse point (UTF-8).  Used for error messages
280  *         only.
281  *
282  * Returns 0 on success; nonzero on failure. */
283 static int win32_capture_reparse_point(HANDLE hFile,
284                                        struct wim_inode *inode,
285                                        struct wim_lookup_table *lookup_table,
286                                        const char *path)
287 {
288         /* "Reparse point data, including the tag and optional GUID,
289          * cannot exceed 16 kilobytes." - MSDN  */
290         char reparse_point_buf[16 * 1024];
291         DWORD bytesReturned;
292
293         if (!DeviceIoControl(hFile, FSCTL_GET_REPARSE_POINT,
294                              NULL, 0, reparse_point_buf,
295                              sizeof(reparse_point_buf), &bytesReturned, NULL))
296         {
297                 DWORD err = GetLastError();
298                 ERROR("Win32 API: Failed to get reparse data of \"%s\"", path);
299                 win32_error(err);
300                 return WIMLIB_ERR_READ;
301         }
302         if (bytesReturned < 8) {
303                 ERROR("Reparse data on \"%s\" is invalid", path);
304                 return WIMLIB_ERR_READ;
305         }
306         inode->i_reparse_tag = *(u32*)reparse_point_buf;
307         return inode_add_ads_with_data(inode, "",
308                                        (const u8*)reparse_point_buf + 8,
309                                        bytesReturned - 8, lookup_table);
310 }
311
312 /* Calculate the SHA1 message digest of a Win32 data stream, which may be either
313  * an unnamed or named data stream.
314  *
315  * @path:       Path to the file, with the stream noted at the end for named
316  *              streams.  UTF-16LE encoding.
317  *
318  * @hash:       On success, the SHA1 message digest of the stream is written to
319  *              this location.
320  *
321  * Returns 0 on success; nonzero on failure.
322  */
323 static int win32_sha1sum(const wchar_t *path, u8 hash[SHA1_HASH_SIZE])
324 {
325         HANDLE hFile;
326         SHA_CTX ctx;
327         u8 buf[32768];
328         DWORD bytesRead;
329         int ret;
330
331         hFile = win32_open_file_readonly(path);
332         if (hFile == INVALID_HANDLE_VALUE)
333                 return WIMLIB_ERR_OPEN;
334
335         sha1_init(&ctx);
336         for (;;) {
337                 if (!ReadFile(hFile, buf, sizeof(buf), &bytesRead, NULL)) {
338                         ret = WIMLIB_ERR_READ;
339                         goto out_close_handle;
340                 }
341                 if (bytesRead == 0)
342                         break;
343                 sha1_update(&ctx, buf, bytesRead);
344         }
345         ret = 0;
346         sha1_final(hash, &ctx);
347 out_close_handle:
348         CloseHandle(hFile);
349         return ret;
350 }
351
352 /* Scans an unnamed or named stream of a Win32 file (not a reparse point
353  * stream); calculates its SHA1 message digest and either creates a `struct
354  * wim_lookup_table_entry' in memory for it, or uses an existing 'struct
355  * wim_lookup_table_entry' for an identical stream.
356  *
357  * @path_utf16:         Path to the file (UTF-16LE).
358  *
359  * @path_utf16_nchars:  Number of 2-byte characters in @path_utf16.
360  *
361  * @inode:              WIM inode to save the stream into.
362  *
363  * @lookup_table:       Stream lookup table for the WIM.
364  *
365  * @dat:                A `WIN32_FIND_STREAM_DATA' structure that specifies the
366  *                      stream name.
367  *
368  * Returns 0 on success; nonzero on failure.
369  */
370 static int win32_capture_stream(const wchar_t *path_utf16,
371                                 size_t path_utf16_nchars,
372                                 struct wim_inode *inode,
373                                 struct wim_lookup_table *lookup_table,
374                                 WIN32_FIND_STREAM_DATA *dat)
375 {
376         struct wim_ads_entry *ads_entry;
377         u8 hash[SHA1_HASH_SIZE];
378         struct wim_lookup_table_entry *lte;
379         int ret;
380         wchar_t *p, *colon;
381         bool is_named_stream;
382         wchar_t *spath;
383         size_t spath_nchars;
384         DWORD err;
385
386         /* The stream name should be returned as :NAME:TYPE */
387         p = dat->cStreamName;
388         if (*p != L':')
389                 goto out_invalid_stream_name;
390         p += 1;
391         colon = wcschr(p, L':');
392         if (colon == NULL)
393                 goto out_invalid_stream_name;
394
395         if (wcscmp(colon + 1, L"$DATA")) {
396                 /* Not a DATA stream */
397                 ret = 0;
398                 goto out;
399         }
400
401         is_named_stream = (p != colon);
402         if (is_named_stream) {
403                 /* Allocate an ADS entry for the named stream. */
404                 char *utf8_stream_name;
405                 size_t utf8_stream_name_len;
406                 ret = utf16_to_utf8((const char *)p,
407                                     (colon - p) * sizeof(wchar_t),
408                                     &utf8_stream_name,
409                                     &utf8_stream_name_len);
410                 if (ret)
411                         goto out;
412                 ads_entry = inode_add_ads(inode, utf8_stream_name);
413                 FREE(utf8_stream_name);
414                 if (!ads_entry) {
415                         ret = WIMLIB_ERR_NOMEM;
416                         goto out;
417                 }
418         }
419
420         /* Create a UTF-16 string @spath that gives the filename, then a colon,
421          * then the stream name.  Or, if it's an unnamed stream, just the
422          * filename.  It is MALLOC()'ed so that it can be saved in the
423          * wim_lookup_table_entry if needed. */
424         *colon = '\0';
425         spath_nchars = path_utf16_nchars;
426         if (is_named_stream)
427                 spath_nchars += colon - p + 1;
428
429         spath = MALLOC((spath_nchars + 1) * sizeof(wchar_t));
430         memcpy(spath, path_utf16, path_utf16_nchars * sizeof(wchar_t));
431         if (is_named_stream) {
432                 spath[path_utf16_nchars] = L':';
433                 memcpy(&spath[path_utf16_nchars + 1], p, (colon - p) * sizeof(wchar_t));
434         }
435         spath[spath_nchars] = L'\0';
436
437         ret = win32_sha1sum(spath, hash);
438         if (ret) {
439                 err = GetLastError();
440                 ERROR("Win32 API: Failed to read \"%ls\" to calculate SHA1sum",
441                       path_utf16);
442                 win32_error(err);
443                 goto out_free_spath;
444         }
445
446         lte = __lookup_resource(lookup_table, hash);
447         if (lte) {
448                 /* Use existing wim_lookup_table_entry that has the same SHA1
449                  * message digest */
450                 lte->refcnt++;
451         } else {
452                 /* Make a new wim_lookup_table_entry */
453                 lte = new_lookup_table_entry();
454                 if (!lte) {
455                         ret = WIMLIB_ERR_NOMEM;
456                         goto out_free_spath;
457                 }
458                 lte->file_on_disk = (char*)spath;
459                 spath = NULL;
460                 lte->resource_location = RESOURCE_WIN32;
461                 lte->resource_entry.original_size = (uint64_t)dat->StreamSize.QuadPart;
462                 lte->resource_entry.size = (uint64_t)dat->StreamSize.QuadPart;
463                 copy_hash(lte->hash, hash);
464                 lookup_table_insert(lookup_table, lte);
465         }
466         if (is_named_stream)
467                 ads_entry->lte = lte;
468         else
469                 inode->i_lte = lte;
470 out_free_spath:
471         FREE(spath);
472 out:
473         return ret;
474 out_invalid_stream_name:
475         ERROR("Invalid stream name: \"%ls:%ls\"", path_utf16, dat->cStreamName);
476         ret = WIMLIB_ERR_READ;
477         goto out;
478 }
479
480 /* Scans a Win32 file for unnamed and named data streams (not reparse point
481  * streams).
482  *
483  * @path_utf16:         Path to the file (UTF-16LE).
484  *
485  * @path_utf16_nchars:  Number of 2-byte characters in @path_utf16.
486  *
487  * @inode:              WIM inode to save the stream into.
488  *
489  * @lookup_table:       Stream lookup table for the WIM.
490  *
491  * Returns 0 on success; nonzero on failure.
492  */
493 static int win32_capture_streams(const wchar_t *path_utf16,
494                                  size_t path_utf16_nchars,
495                                  struct wim_inode *inode,
496                                  struct wim_lookup_table *lookup_table)
497 {
498         WIN32_FIND_STREAM_DATA dat;
499         int ret;
500         HANDLE hFind;
501         DWORD err;
502
503         hFind = FindFirstStreamW(path_utf16, FindStreamInfoStandard, &dat, 0);
504         if (hFind == INVALID_HANDLE_VALUE) {
505                 err = GetLastError();
506
507                 /* Seems legal for this to return ERROR_HANDLE_EOF on reparse
508                  * points and directories */
509                 if ((inode->i_attributes &
510                     (FILE_ATTRIBUTE_REPARSE_POINT | FILE_ATTRIBUTE_DIRECTORY))
511                     && err == ERROR_HANDLE_EOF)
512                 {
513                         return 0;
514                 } else {
515                         ERROR("Win32 API: Failed to look up data streams of \"%ls\"",
516                               path_utf16);
517                         win32_error(err);
518                         return WIMLIB_ERR_READ;
519                 }
520         }
521         do {
522                 ret = win32_capture_stream(path_utf16,
523                                            path_utf16_nchars,
524                                            inode, lookup_table,
525                                            &dat);
526                 if (ret)
527                         goto out_find_close;
528         } while (FindNextStreamW(hFind, &dat));
529         err = GetLastError();
530         if (err != ERROR_HANDLE_EOF) {
531                 ERROR("Win32 API: Error reading data streams from \"%ls\"", path_utf16);
532                 win32_error(err);
533                 ret = WIMLIB_ERR_READ;
534         }
535 out_find_close:
536         FindClose(hFind);
537         return ret;
538 }
539
540 #endif
541
542 /*
543  * build_dentry_tree():
544  *      Recursively builds a tree of WIM dentries from an on-disk directory
545  *      tree.
546  *
547  * @root_ret:   Place to return a pointer to the root of the dentry tree.  Only
548  *              modified if successful.  Set to NULL if the file or directory was
549  *              excluded from capture.
550  *
551  * @root_disk_path:  The path to the root of the directory tree on disk (UTF-8).
552  *
553  * @lookup_table: The lookup table for the WIM file.  For each file added to the
554  *              dentry tree being built, an entry is added to the lookup table,
555  *              unless an identical stream is already in the lookup table.
556  *              These lookup table entries that are added point to the path of
557  *              the file on disk.
558  *
559  * @sd:         Ignored.  (Security data only captured in NTFS mode.)
560  *
561  * @capture_config:
562  *              Configuration for files to be excluded from capture.
563  *
564  * @add_flags:  Bitwise or of WIMLIB_ADD_IMAGE_FLAG_*
565  *
566  * @extra_arg:  Ignored in UNIX builds; used to pass sd_set pointer in Windows
567  *              builds.
568  *
569  * @return:     0 on success, nonzero on failure.  It is a failure if any of
570  *              the files cannot be `stat'ed, or if any of the needed
571  *              directories cannot be opened or read.  Failure to add the files
572  *              to the WIM may still occur later when trying to actually read
573  *              the on-disk files during a call to wimlib_write() or
574  *              wimlib_overwrite().
575  */
576 static int build_dentry_tree(struct wim_dentry **root_ret,
577                              const char *root_disk_path,
578                              struct wim_lookup_table *lookup_table,
579                              struct wim_security_data *sd,
580                              const struct capture_config *config,
581                              int add_image_flags,
582                              wimlib_progress_func_t progress_func,
583                              void *extra_arg)
584 {
585         struct wim_dentry *root = NULL;
586         int ret = 0;
587         struct wim_inode *inode;
588
589         if (exclude_path(root_disk_path, config, true)) {
590                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) {
591                         ERROR("Cannot exclude the root directory from capture");
592                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
593                         goto out;
594                 }
595                 if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
596                     && progress_func)
597                 {
598                         union wimlib_progress_info info;
599                         info.scan.cur_path = root_disk_path;
600                         info.scan.excluded = true;
601                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
602                 }
603                 goto out;
604         }
605
606         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
607             && progress_func)
608         {
609                 union wimlib_progress_info info;
610                 info.scan.cur_path = root_disk_path;
611                 info.scan.excluded = false;
612                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
613         }
614
615 #if !defined(__CYGWIN__) && !defined(__WIN32__)
616         /* UNIX version of capturing a directory tree */
617         struct stat root_stbuf;
618         int (*stat_fn)(const char *restrict, struct stat *restrict);
619         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE)
620                 stat_fn = stat;
621         else
622                 stat_fn = lstat;
623
624         ret = (*stat_fn)(root_disk_path, &root_stbuf);
625         if (ret != 0) {
626                 ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
627                 goto out;
628         }
629
630         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) &&
631               !S_ISDIR(root_stbuf.st_mode))
632         {
633                 /* Do a dereference-stat in case the root is a symbolic link.
634                  * This case is allowed, provided that the symbolic link points
635                  * to a directory. */
636                 ret = stat(root_disk_path, &root_stbuf);
637                 if (ret != 0) {
638                         ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
639                         ret = WIMLIB_ERR_STAT;
640                         goto out;
641                 }
642                 if (!S_ISDIR(root_stbuf.st_mode)) {
643                         ERROR("`%s' is not a directory", root_disk_path);
644                         ret = WIMLIB_ERR_NOTDIR;
645                         goto out;
646                 }
647         }
648         if (!S_ISREG(root_stbuf.st_mode) && !S_ISDIR(root_stbuf.st_mode)
649             && !S_ISLNK(root_stbuf.st_mode)) {
650                 ERROR("`%s' is not a regular file, directory, or symbolic link.",
651                       root_disk_path);
652                 ret = WIMLIB_ERR_SPECIAL_FILE;
653                 goto out;
654         }
655
656         root = new_dentry_with_timeless_inode(path_basename(root_disk_path));
657         if (!root) {
658                 if (errno == EILSEQ)
659                         ret = WIMLIB_ERR_INVALID_UTF8_STRING;
660                 else if (errno == ENOMEM)
661                         ret = WIMLIB_ERR_NOMEM;
662                 else
663                         ret = WIMLIB_ERR_ICONV_NOT_AVAILABLE;
664                 goto out;
665         }
666
667         inode = root->d_inode;
668
669 #ifdef HAVE_STAT_NANOSECOND_PRECISION
670         inode->i_creation_time = timespec_to_wim_timestamp(&root_stbuf.st_mtim);
671         inode->i_last_write_time = timespec_to_wim_timestamp(&root_stbuf.st_mtim);
672         inode->i_last_access_time = timespec_to_wim_timestamp(&root_stbuf.st_atim);
673 #else
674         inode->i_creation_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
675         inode->i_last_write_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
676         inode->i_last_access_time = unix_timestamp_to_wim(root_stbuf.st_atime);
677 #endif
678         /* Leave the inode number at 0 for directories. */
679         if (!S_ISDIR(root_stbuf.st_mode)) {
680                 if (sizeof(ino_t) >= 8)
681                         inode->i_ino = (u64)root_stbuf.st_ino;
682                 else
683                         inode->i_ino = (u64)root_stbuf.st_ino |
684                                            ((u64)root_stbuf.st_dev <<
685                                                 ((sizeof(ino_t) * 8) & 63));
686         }
687         inode->i_resolved = 1;
688         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
689                 ret = inode_set_unix_data(inode, root_stbuf.st_uid,
690                                           root_stbuf.st_gid,
691                                           root_stbuf.st_mode,
692                                           lookup_table,
693                                           UNIX_DATA_ALL | UNIX_DATA_CREATE);
694                 if (ret)
695                         goto out;
696         }
697         add_image_flags &= ~(WIMLIB_ADD_IMAGE_FLAG_ROOT | WIMLIB_ADD_IMAGE_FLAG_SOURCE);
698         if (S_ISREG(root_stbuf.st_mode)) { /* Archiving a regular file */
699
700                 struct wim_lookup_table_entry *lte;
701                 u8 hash[SHA1_HASH_SIZE];
702
703                 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
704
705                 /* Empty files do not have to have a lookup table entry. */
706                 if (root_stbuf.st_size == 0)
707                         goto out;
708
709                 /* For each regular file, we must check to see if the file is in
710                  * the lookup table already; if it is, we increment its refcnt;
711                  * otherwise, we create a new lookup table entry and insert it.
712                  * */
713
714                 ret = sha1sum(root_disk_path, hash);
715                 if (ret != 0)
716                         goto out;
717
718                 lte = __lookup_resource(lookup_table, hash);
719                 if (lte) {
720                         lte->refcnt++;
721                         DEBUG("Add lte reference %u for `%s'", lte->refcnt,
722                               root_disk_path);
723                 } else {
724                         char *file_on_disk = STRDUP(root_disk_path);
725                         if (!file_on_disk) {
726                                 ERROR("Failed to allocate memory for file path");
727                                 ret = WIMLIB_ERR_NOMEM;
728                                 goto out;
729                         }
730                         lte = new_lookup_table_entry();
731                         if (!lte) {
732                                 FREE(file_on_disk);
733                                 ret = WIMLIB_ERR_NOMEM;
734                                 goto out;
735                         }
736                         lte->file_on_disk = file_on_disk;
737                         lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
738                         lte->resource_entry.original_size = root_stbuf.st_size;
739                         lte->resource_entry.size = root_stbuf.st_size;
740                         copy_hash(lte->hash, hash);
741                         lookup_table_insert(lookup_table, lte);
742                 }
743                 root->d_inode->i_lte = lte;
744         } else if (S_ISDIR(root_stbuf.st_mode)) { /* Archiving a directory */
745
746                 inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
747
748                 DIR *dir;
749                 struct dirent entry, *result;
750                 struct wim_dentry *child;
751
752                 dir = opendir(root_disk_path);
753                 if (!dir) {
754                         ERROR_WITH_ERRNO("Failed to open the directory `%s'",
755                                          root_disk_path);
756                         ret = WIMLIB_ERR_OPEN;
757                         goto out;
758                 }
759
760                 /* Buffer for names of files in directory. */
761                 size_t len = strlen(root_disk_path);
762                 char name[len + 1 + FILENAME_MAX + 1];
763                 memcpy(name, root_disk_path, len);
764                 name[len] = '/';
765
766                 /* Create a dentry for each entry in the directory on disk, and recurse
767                  * to any subdirectories. */
768                 while (1) {
769                         errno = 0;
770                         ret = readdir_r(dir, &entry, &result);
771                         if (ret != 0) {
772                                 ret = WIMLIB_ERR_READ;
773                                 ERROR_WITH_ERRNO("Error reading the "
774                                                  "directory `%s'",
775                                                  root_disk_path);
776                                 break;
777                         }
778                         if (result == NULL)
779                                 break;
780                         if (result->d_name[0] == '.' && (result->d_name[1] == '\0'
781                               || (result->d_name[1] == '.' && result->d_name[2] == '\0')))
782                                         continue;
783                         strcpy(name + len + 1, result->d_name);
784                         ret = build_dentry_tree(&child, name, lookup_table,
785                                                 NULL, config, add_image_flags,
786                                                 progress_func, NULL);
787                         if (ret != 0)
788                                 break;
789                         if (child)
790                                 dentry_add_child(root, child);
791                 }
792                 closedir(dir);
793         } else { /* Archiving a symbolic link */
794                 inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
795                 inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
796
797                 /* The idea here is to call readlink() to get the UNIX target of
798                  * the symbolic link, then turn the target into a reparse point
799                  * data buffer that contains a relative or absolute symbolic
800                  * link (NOT a junction point or *full* path symbolic link with
801                  * drive letter).
802                  */
803
804                 char deref_name_buf[4096];
805                 ssize_t deref_name_len;
806
807                 deref_name_len = readlink(root_disk_path, deref_name_buf,
808                                           sizeof(deref_name_buf) - 1);
809                 if (deref_name_len >= 0) {
810                         deref_name_buf[deref_name_len] = '\0';
811                         DEBUG("Read symlink `%s'", deref_name_buf);
812                         ret = inode_set_symlink(root->d_inode, deref_name_buf,
813                                                 lookup_table, NULL);
814                         if (ret == 0) {
815                                 /*
816                                  * Unfortunately, Windows seems to have the
817                                  * concept of "file" symbolic links as being
818                                  * different from "directory" symbolic links...
819                                  * so FILE_ATTRIBUTE_DIRECTORY needs to be set
820                                  * on the symbolic link if the *target* of the
821                                  * symbolic link is a directory.
822                                  */
823                                 struct stat stbuf;
824                                 if (stat(root_disk_path, &stbuf) == 0 &&
825                                     S_ISDIR(stbuf.st_mode))
826                                 {
827                                         inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
828                                 }
829                         }
830                 } else {
831                         ERROR_WITH_ERRNO("Failed to read target of "
832                                          "symbolic link `%s'", root_disk_path);
833                         ret = WIMLIB_ERR_READLINK;
834                 }
835         }
836 #else
837         /* Win32 version of capturing a directory tree */
838
839         wchar_t *path_utf16;
840         size_t path_utf16_nchars;
841         struct sd_set *sd_set;
842         DWORD err;
843
844         if (extra_arg == NULL) {
845                 sd_set = alloca(sizeof(struct sd_set));
846                 sd_set->rb_root.rb_node = NULL,
847                 sd_set->sd = sd;
848         } else {
849                 sd_set = extra_arg;
850         }
851
852         ret = utf8_to_utf16(root_disk_path, strlen(root_disk_path),
853                             (char**)&path_utf16, &path_utf16_nchars);
854         if (ret)
855                 goto out_destroy_sd_set;
856         path_utf16_nchars /= sizeof(wchar_t);
857
858         HANDLE hFile = win32_open_file_readonly(path_utf16);
859         if (hFile == INVALID_HANDLE_VALUE) {
860                 err = GetLastError();
861                 ERROR("Win32 API: Failed to open \"%s\"", root_disk_path);
862                 win32_error(err);
863                 ret = WIMLIB_ERR_OPEN;
864                 goto out_free_path_utf16;
865         }
866
867         BY_HANDLE_FILE_INFORMATION file_info;
868         if (!GetFileInformationByHandle(hFile, &file_info)) {
869                 err = GetLastError();
870                 ERROR("Win32 API: Failed to get file information for \"%s\"",
871                       root_disk_path);
872                 win32_error(err);
873                 ret = WIMLIB_ERR_STAT;
874                 goto out_close_handle;
875         }
876
877         /* Create a WIM dentry */
878         root = new_dentry_with_timeless_inode(path_basename(root_disk_path));
879         if (!root) {
880                 if (errno == EILSEQ)
881                         ret = WIMLIB_ERR_INVALID_UTF8_STRING;
882                 else if (errno == ENOMEM)
883                         ret = WIMLIB_ERR_NOMEM;
884                 else
885                         ret = WIMLIB_ERR_ICONV_NOT_AVAILABLE;
886                 goto out_close_handle;
887         }
888
889         /* Start preparing the associated WIM inode */
890         inode = root->d_inode;
891
892         inode->i_attributes = file_info.dwFileAttributes;
893         inode->i_creation_time = FILETIME_to_u64(&file_info.ftCreationTime);
894         inode->i_last_write_time = FILETIME_to_u64(&file_info.ftLastWriteTime);
895         inode->i_last_access_time = FILETIME_to_u64(&file_info.ftLastAccessTime);
896         inode->i_ino = ((u64)file_info.nFileIndexHigh << 32) |
897                         (u64)file_info.nFileIndexLow;
898
899         inode->i_resolved = 1;
900         add_image_flags &= ~(WIMLIB_ADD_IMAGE_FLAG_ROOT | WIMLIB_ADD_IMAGE_FLAG_SOURCE);
901
902         /* Get DOS name and security descriptor (if any). */
903         ret = win32_get_short_name(root, path_utf16);
904         if (ret)
905                 goto out_close_handle;
906         ret = win32_get_security_descriptor(root, sd_set, path_utf16);
907         if (ret)
908                 goto out_close_handle;
909
910         if (inode_is_directory(inode)) {
911                 /* Directory (not a reparse point) --- recurse to children */
912
913                 /* But first... directories may have alternate data streams that
914                  * need to be captured. */
915                 ret = win32_capture_streams(path_utf16,
916                                             path_utf16_nchars,
917                                             inode,
918                                             lookup_table);
919                 if (ret)
920                         goto out_close_handle;
921                 ret = win32_recurse_directory(root,
922                                               root_disk_path,
923                                               lookup_table,
924                                               sd,
925                                               config,
926                                               add_image_flags,
927                                               progress_func,
928                                               sd_set,
929                                               path_utf16,
930                                               path_utf16_nchars);
931         } else if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
932                 /* Reparse point: save the reparse tag and data */
933                 ret = win32_capture_reparse_point(hFile,
934                                                   inode,
935                                                   lookup_table,
936                                                   root_disk_path);
937         } else {
938                 /* Not a directory, not a reparse point; capture the default
939                  * file contents and any alternate data streams. */
940                 ret = win32_capture_streams(path_utf16,
941                                             path_utf16_nchars,
942                                             inode,
943                                             lookup_table);
944         }
945 out_close_handle:
946         CloseHandle(hFile);
947 out_free_path_utf16:
948         FREE(path_utf16);
949 out_destroy_sd_set:
950         if (extra_arg == NULL)
951                 destroy_sd_set(sd_set);
952 #endif
953         /* The below lines of code are common to both UNIX and Win32 builds.  It
954          * simply returns the captured directory tree if the capture was
955          * successful, or frees it if the capture was unsuccessful. */
956 out:
957         if (ret == 0)
958                 *root_ret = root;
959         else
960                 free_dentry_tree(root, lookup_table);
961         return ret;
962 }
963
964 enum pattern_type {
965         NONE = 0,
966         EXCLUSION_LIST,
967         EXCLUSION_EXCEPTION,
968         COMPRESSION_EXCLUSION_LIST,
969         ALIGNMENT_LIST,
970 };
971
972 #define COMPAT_DEFAULT_CONFIG
973
974 /* Default capture configuration file when none is specified. */
975 static const char *default_config =
976 #ifdef COMPAT_DEFAULT_CONFIG /* XXX: This policy is being moved to library
977                                 users.  The next ABI-incompatible library
978                                 version will default to the empty string here. */
979 "[ExclusionList]\n"
980 "\\$ntfs.log\n"
981 "\\hiberfil.sys\n"
982 "\\pagefile.sys\n"
983 "\\System Volume Information\n"
984 "\\RECYCLER\n"
985 "\\Windows\\CSC\n"
986 "\n"
987 "[CompressionExclusionList]\n"
988 "*.mp3\n"
989 "*.zip\n"
990 "*.cab\n"
991 "\\WINDOWS\\inf\\*.pnf\n";
992 #else
993 "";
994 #endif
995
996 static void destroy_pattern_list(struct pattern_list *list)
997 {
998         FREE(list->pats);
999 }
1000
1001 static void destroy_capture_config(struct capture_config *config)
1002 {
1003         destroy_pattern_list(&config->exclusion_list);
1004         destroy_pattern_list(&config->exclusion_exception);
1005         destroy_pattern_list(&config->compression_exclusion_list);
1006         destroy_pattern_list(&config->alignment_list);
1007         FREE(config->config_str);
1008         FREE(config->prefix);
1009         memset(config, 0, sizeof(*config));
1010 }
1011
1012 static int pattern_list_add_pattern(struct pattern_list *list,
1013                                     const char *pattern)
1014 {
1015         const char **pats;
1016         if (list->num_pats >= list->num_allocated_pats) {
1017                 pats = REALLOC(list->pats,
1018                                sizeof(list->pats[0]) * (list->num_allocated_pats + 8));
1019                 if (!pats)
1020                         return WIMLIB_ERR_NOMEM;
1021                 list->num_allocated_pats += 8;
1022                 list->pats = pats;
1023         }
1024         list->pats[list->num_pats++] = pattern;
1025         return 0;
1026 }
1027
1028 /* Parses the contents of the image capture configuration file and fills in a
1029  * `struct capture_config'. */
1030 static int init_capture_config(struct capture_config *config,
1031                                const char *_config_str, size_t config_len)
1032 {
1033         char *config_str;
1034         char *p;
1035         char *eol;
1036         char *next_p;
1037         size_t bytes_remaining;
1038         enum pattern_type type = NONE;
1039         int ret;
1040         unsigned long line_no = 0;
1041
1042         DEBUG("config_len = %zu", config_len);
1043         bytes_remaining = config_len;
1044         memset(config, 0, sizeof(*config));
1045         config_str = MALLOC(config_len);
1046         if (!config_str) {
1047                 ERROR("Could not duplicate capture config string");
1048                 return WIMLIB_ERR_NOMEM;
1049         }
1050
1051         memcpy(config_str, _config_str, config_len);
1052         next_p = config_str;
1053         config->config_str = config_str;
1054         while (bytes_remaining) {
1055                 line_no++;
1056                 p = next_p;
1057                 eol = memchr(p, '\n', bytes_remaining);
1058                 if (!eol) {
1059                         ERROR("Expected end-of-line in capture config file on "
1060                               "line %lu", line_no);
1061                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
1062                         goto out_destroy;
1063                 }
1064
1065                 next_p = eol + 1;
1066                 bytes_remaining -= (next_p - p);
1067                 if (eol == p)
1068                         continue;
1069
1070                 if (*(eol - 1) == '\r')
1071                         eol--;
1072                 *eol = '\0';
1073
1074                 /* Translate backslash to forward slash */
1075                 for (char *pp = p; pp != eol; pp++)
1076                         if (*pp == '\\')
1077                                 *pp = '/';
1078
1079                 /* Remove drive letter */
1080                 if (eol - p > 2 && isalpha(*p) && *(p + 1) == ':')
1081                         p += 2;
1082
1083                 ret = 0;
1084                 if (strcmp(p, "[ExclusionList]") == 0)
1085                         type = EXCLUSION_LIST;
1086                 else if (strcmp(p, "[ExclusionException]") == 0)
1087                         type = EXCLUSION_EXCEPTION;
1088                 else if (strcmp(p, "[CompressionExclusionList]") == 0)
1089                         type = COMPRESSION_EXCLUSION_LIST;
1090                 else if (strcmp(p, "[AlignmentList]") == 0)
1091                         type = ALIGNMENT_LIST;
1092                 else if (p[0] == '[' && strrchr(p, ']')) {
1093                         ERROR("Unknown capture configuration section `%s'", p);
1094                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
1095                 } else switch (type) {
1096                 case EXCLUSION_LIST:
1097                         DEBUG("Adding pattern \"%s\" to exclusion list", p);
1098                         ret = pattern_list_add_pattern(&config->exclusion_list, p);
1099                         break;
1100                 case EXCLUSION_EXCEPTION:
1101                         DEBUG("Adding pattern \"%s\" to exclusion exception list", p);
1102                         ret = pattern_list_add_pattern(&config->exclusion_exception, p);
1103                         break;
1104                 case COMPRESSION_EXCLUSION_LIST:
1105                         DEBUG("Adding pattern \"%s\" to compression exclusion list", p);
1106                         ret = pattern_list_add_pattern(&config->compression_exclusion_list, p);
1107                         break;
1108                 case ALIGNMENT_LIST:
1109                         DEBUG("Adding pattern \"%s\" to alignment list", p);
1110                         ret = pattern_list_add_pattern(&config->alignment_list, p);
1111                         break;
1112                 default:
1113                         ERROR("Line %lu of capture configuration is not "
1114                               "in a block (such as [ExclusionList])",
1115                               line_no);
1116                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
1117                         break;
1118                 }
1119                 if (ret != 0)
1120                         goto out_destroy;
1121         }
1122         return 0;
1123 out_destroy:
1124         destroy_capture_config(config);
1125         return ret;
1126 }
1127
1128 static int capture_config_set_prefix(struct capture_config *config,
1129                                      const char *_prefix)
1130 {
1131         char *prefix = STRDUP(_prefix);
1132
1133         if (!prefix)
1134                 return WIMLIB_ERR_NOMEM;
1135         FREE(config->prefix);
1136         config->prefix = prefix;
1137         config->prefix_len = strlen(prefix);
1138         return 0;
1139 }
1140
1141 static bool match_pattern(const char *path, const char *path_basename,
1142                           const struct pattern_list *list)
1143 {
1144         for (size_t i = 0; i < list->num_pats; i++) {
1145                 const char *pat = list->pats[i];
1146                 const char *string;
1147                 if (pat[0] == '/')
1148                         /* Absolute path from root of capture */
1149                         string = path;
1150                 else {
1151                         if (strchr(pat, '/'))
1152                                 /* Relative path from root of capture */
1153                                 string = path + 1;
1154                         else
1155                                 /* A file name pattern */
1156                                 string = path_basename;
1157                 }
1158                 if (fnmatch(pat, string, FNM_PATHNAME
1159                         #ifdef FNM_CASEFOLD
1160                                         | FNM_CASEFOLD
1161                         #endif
1162                         ) == 0)
1163                 {
1164                         DEBUG("`%s' matches the pattern \"%s\"",
1165                               string, pat);
1166                         return true;
1167                 }
1168         }
1169         return false;
1170 }
1171
1172 /* Return true if the image capture configuration file indicates we should
1173  * exclude the filename @path from capture.
1174  *
1175  * If @exclude_prefix is %true, the part of the path up and including the name
1176  * of the directory being captured is not included in the path for matching
1177  * purposes.  This allows, for example, a pattern like /hiberfil.sys to match a
1178  * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7
1179  * directory.
1180  */
1181 bool exclude_path(const char *path, const struct capture_config *config,
1182                   bool exclude_prefix)
1183 {
1184         const char *basename = path_basename(path);
1185         if (exclude_prefix) {
1186                 wimlib_assert(strlen(path) >= config->prefix_len);
1187                 if (memcmp(config->prefix, path, config->prefix_len) == 0
1188                      && path[config->prefix_len] == '/')
1189                         path += config->prefix_len;
1190         }
1191         return match_pattern(path, basename, &config->exclusion_list) &&
1192                 !match_pattern(path, basename, &config->exclusion_exception);
1193
1194 }
1195
1196 /* Strip leading and trailing forward slashes from a string.  Modifies it in
1197  * place and returns the stripped string. */
1198 static const char *canonicalize_target_path(char *target_path)
1199 {
1200         char *p;
1201         if (target_path == NULL)
1202                 return "";
1203         for (;;) {
1204                 if (*target_path == '\0')
1205                         return target_path;
1206                 else if (*target_path == '/')
1207                         target_path++;
1208                 else
1209                         break;
1210         }
1211
1212         p = target_path + strlen(target_path) - 1;
1213         while (*p == '/')
1214                 *p-- = '\0';
1215         return target_path;
1216 }
1217
1218 #if defined(__CYGWIN__) || defined(__WIN32__)
1219 static void zap_backslashes(char *s)
1220 {
1221         while (*s) {
1222                 if (*s == '\\')
1223                         *s = '/';
1224                 s++;
1225         }
1226 }
1227 #endif
1228
1229 /* Strip leading and trailing slashes from the target paths */
1230 static void canonicalize_targets(struct wimlib_capture_source *sources,
1231                                  size_t num_sources)
1232 {
1233         while (num_sources--) {
1234                 DEBUG("Canonicalizing { source: \"%s\", target=\"%s\"}",
1235                       sources->fs_source_path,
1236                       sources->wim_target_path);
1237 #if defined(__CYGWIN__) || defined(__WIN32__)
1238                 /* The Windows API can handle forward slashes.  Just get rid of
1239                  * backslashes to avoid confusing other parts of the library
1240                  * code. */
1241                 zap_backslashes(sources->fs_source_path);
1242                 if (sources->wim_target_path)
1243                         zap_backslashes(sources->wim_target_path);
1244 #endif
1245                 sources->wim_target_path =
1246                         (char*)canonicalize_target_path(sources->wim_target_path);
1247                 DEBUG("Canonical target: \"%s\"", sources->wim_target_path);
1248                 sources++;
1249         }
1250 }
1251
1252 static int capture_source_cmp(const void *p1, const void *p2)
1253 {
1254         const struct wimlib_capture_source *s1 = p1, *s2 = p2;
1255         return strcmp(s1->wim_target_path, s2->wim_target_path);
1256 }
1257
1258 /* Sorts the capture sources lexicographically by target path.  This occurs
1259  * after leading and trailing forward slashes are stripped.
1260  *
1261  * One purpose of this is to make sure that target paths that are inside other
1262  * target paths are added after the containing target paths. */
1263 static void sort_sources(struct wimlib_capture_source *sources,
1264                          size_t num_sources)
1265 {
1266         qsort(sources, num_sources, sizeof(sources[0]), capture_source_cmp);
1267 }
1268
1269 static int check_sorted_sources(struct wimlib_capture_source *sources,
1270                                 size_t num_sources, int add_image_flags)
1271 {
1272         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
1273                 if (num_sources != 1) {
1274                         ERROR("Must specify exactly 1 capture source "
1275                               "(the NTFS volume) in NTFS mode!");
1276                         return WIMLIB_ERR_INVALID_PARAM;
1277                 }
1278                 if (sources[0].wim_target_path[0] != '\0') {
1279                         ERROR("In NTFS capture mode the target path inside "
1280                               "the image must be the root directory!");
1281                         return WIMLIB_ERR_INVALID_PARAM;
1282                 }
1283         } else if (num_sources != 0) {
1284                 /* This code is disabled because the current code
1285                  * unconditionally attempts to do overlays.  So, duplicate
1286                  * target paths are OK. */
1287         #if 0
1288                 if (num_sources > 1 && sources[0].wim_target_path[0] == '\0') {
1289                         ERROR("Cannot specify root target when using multiple "
1290                               "capture sources!");
1291                         return WIMLIB_ERR_INVALID_PARAM;
1292                 }
1293                 for (size_t i = 0; i < num_sources - 1; i++) {
1294                         size_t len = strlen(sources[i].wim_target_path);
1295                         size_t j = i + 1;
1296                         const char *target1 = sources[i].wim_target_path;
1297                         do {
1298                                 const char *target2 = sources[j].wim_target_path;
1299                                 DEBUG("target1=%s, target2=%s",
1300                                       target1,target2);
1301                                 if (strncmp(target1, target2, len) ||
1302                                     target2[len] > '/')
1303                                         break;
1304                                 if (target2[len] == '/') {
1305                                         ERROR("Invalid target `%s': is a prefix of `%s'",
1306                                               target1, target2);
1307                                         return WIMLIB_ERR_INVALID_PARAM;
1308                                 }
1309                                 if (target2[len] == '\0') {
1310                                         ERROR("Invalid target `%s': is a duplicate of `%s'",
1311                                               target1, target2);
1312                                         return WIMLIB_ERR_INVALID_PARAM;
1313                                 }
1314                         } while (++j != num_sources);
1315                 }
1316         #endif
1317         }
1318         return 0;
1319
1320 }
1321
1322 /* Creates a new directory to place in the WIM image.  This is to create parent
1323  * directories that are not part of any target as needed.  */
1324 static struct wim_dentry *
1325 new_filler_directory(const char *name)
1326 {
1327         struct wim_dentry *dentry;
1328         DEBUG("Creating filler directory \"%s\"", name);
1329         dentry = new_dentry_with_inode(name);
1330         if (dentry) {
1331                 /* Leave the inode number as 0 for now.  The final inode number
1332                  * will be assigned later by assign_inode_numbers(). */
1333                 dentry->d_inode->i_resolved = 1;
1334                 dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
1335         }
1336         return dentry;
1337 }
1338
1339 /* Transfers the children of @branch to @target.  It is an error if @target is
1340  * not a directory or if both @branch and @target contain a child dentry with
1341  * the same name. */
1342 static int do_overlay(struct wim_dentry *target, struct wim_dentry *branch)
1343 {
1344         struct rb_root *rb_root;
1345
1346         DEBUG("Doing overlay %s => %s",
1347               branch->file_name_utf8, target->file_name_utf8);
1348
1349         if (!dentry_is_directory(target)) {
1350                 ERROR("Cannot overlay directory `%s' over non-directory",
1351                       branch->file_name_utf8);
1352                 return WIMLIB_ERR_INVALID_OVERLAY;
1353         }
1354
1355         rb_root = &branch->d_inode->i_children;
1356         while (rb_root->rb_node) { /* While @branch has children... */
1357                 struct wim_dentry *child = rbnode_dentry(rb_root->rb_node);
1358                 /* Move @child to the directory @target */
1359                 unlink_dentry(child);
1360                 if (!dentry_add_child(target, child)) {
1361                         /* Revert the change to avoid leaking the directory tree
1362                          * rooted at @child */
1363                         dentry_add_child(branch, child);
1364                         ERROR("Overlay error: file `%s' already exists "
1365                               "as a child of `%s'",
1366                               child->file_name_utf8, target->file_name_utf8);
1367                         return WIMLIB_ERR_INVALID_OVERLAY;
1368                 }
1369         }
1370         free_dentry(branch);
1371         return 0;
1372
1373 }
1374
1375 /* Attach or overlay a branch onto the WIM image.
1376  *
1377  * @root_p:
1378  *      Pointer to the root of the WIM image, or pointer to NULL if it has not
1379  *      been created yet.
1380  * @branch
1381  *      Branch to add.
1382  * @target_path:
1383  *      Path in the WIM image to add the branch, with leading and trailing
1384  *      slashes stripped.
1385  */
1386 static int attach_branch(struct wim_dentry **root_p,
1387                          struct wim_dentry *branch,
1388                          char *target_path)
1389 {
1390         char *slash;
1391         struct wim_dentry *dentry, *parent, *target;
1392
1393         DEBUG("Attaching branch \"%s\" => \"%s\"",
1394               branch->file_name_utf8, target_path);
1395
1396         if (*target_path == '\0') {
1397                 /* Target: root directory */
1398                 if (*root_p) {
1399                         /* Overlay on existing root */
1400                         return do_overlay(*root_p, branch);
1401                 } else  {
1402                         /* Set as root */
1403                         *root_p = branch;
1404                         return 0;
1405                 }
1406         }
1407
1408         /* Adding a non-root branch.  Create root if it hasn't been created
1409          * already. */
1410         if (!*root_p) {
1411                 *root_p = new_filler_directory("");
1412                 if (!*root_p)
1413                         return WIMLIB_ERR_NOMEM;
1414         }
1415
1416         /* Walk the path to the branch, creating filler directories as needed.
1417          * */
1418         parent = *root_p;
1419         while ((slash = strchr(target_path, '/'))) {
1420                 *slash = '\0';
1421                 dentry = get_dentry_child_with_name(parent, target_path);
1422                 if (!dentry) {
1423                         dentry = new_filler_directory(target_path);
1424                         if (!dentry)
1425                                 return WIMLIB_ERR_NOMEM;
1426                         dentry_add_child(parent, dentry);
1427                 }
1428                 parent = dentry;
1429                 target_path = slash;
1430                 /* Skip over slashes.  Note: this cannot overrun the length of
1431                  * the string because the last character cannot be a slash, as
1432                  * trailing slashes were tripped.  */
1433                 do {
1434                         ++target_path;
1435                 } while (*target_path == '/');
1436         }
1437
1438         /* If the target path already existed, overlay the branch onto it.
1439          * Otherwise, set the branch as the target path. */
1440         target = get_dentry_child_with_name(parent, branch->file_name_utf8);
1441         if (target) {
1442                 return do_overlay(target, branch);
1443         } else {
1444                 dentry_add_child(parent, branch);
1445                 return 0;
1446         }
1447 }
1448
1449 WIMLIBAPI int wimlib_add_image_multisource(WIMStruct *w,
1450                                            struct wimlib_capture_source *sources,
1451                                            size_t num_sources,
1452                                            const char *name,
1453                                            const char *config_str,
1454                                            size_t config_len,
1455                                            int add_image_flags,
1456                                            wimlib_progress_func_t progress_func)
1457 {
1458         int (*capture_tree)(struct wim_dentry **, const char *,
1459                             struct wim_lookup_table *,
1460                             struct wim_security_data *,
1461                             const struct capture_config *,
1462                             int, wimlib_progress_func_t, void *);
1463         void *extra_arg;
1464         struct wim_dentry *root_dentry;
1465         struct wim_dentry *branch;
1466         struct wim_security_data *sd;
1467         struct capture_config config;
1468         struct wim_image_metadata *imd;
1469         int ret;
1470
1471         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
1472 #ifdef WITH_NTFS_3G
1473                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
1474                         ERROR("Cannot dereference files when capturing directly from NTFS");
1475                         return WIMLIB_ERR_INVALID_PARAM;
1476                 }
1477                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
1478                         ERROR("Capturing UNIX owner and mode not supported "
1479                               "when capturing directly from NTFS");
1480                         return WIMLIB_ERR_INVALID_PARAM;
1481                 }
1482                 capture_tree = build_dentry_tree_ntfs;
1483                 extra_arg = &w->ntfs_vol;
1484 #else
1485                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
1486                       "        cannot capture a WIM image directly from a NTFS volume!");
1487                 return WIMLIB_ERR_UNSUPPORTED;
1488 #endif
1489         } else {
1490                 capture_tree = build_dentry_tree;
1491                 extra_arg = NULL;
1492         }
1493
1494 #if defined(__CYGWIN__) || defined(__WIN32__)
1495         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
1496                 ERROR("Capturing UNIX-specific data is not supported on Windows");
1497                 return WIMLIB_ERR_INVALID_PARAM;
1498         }
1499         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
1500                 ERROR("Dereferencing symbolic links is not supported on Windows");
1501                 return WIMLIB_ERR_INVALID_PARAM;
1502         }
1503 #endif
1504
1505         if (!name || !*name) {
1506                 ERROR("Must specify a non-empty string for the image name");
1507                 return WIMLIB_ERR_INVALID_PARAM;
1508         }
1509
1510         if (w->hdr.total_parts != 1) {
1511                 ERROR("Cannot add an image to a split WIM");
1512                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
1513         }
1514
1515         if (wimlib_image_name_in_use(w, name)) {
1516                 ERROR("There is already an image named \"%s\" in `%s'",
1517                       name, w->filename);
1518                 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
1519         }
1520
1521         if (!config_str) {
1522                 DEBUG("Using default capture configuration");
1523                 config_str = default_config;
1524                 config_len = strlen(default_config);
1525         }
1526         ret = init_capture_config(&config, config_str, config_len);
1527         if (ret)
1528                 goto out;
1529
1530         DEBUG("Allocating security data");
1531         sd = CALLOC(1, sizeof(struct wim_security_data));
1532         if (!sd) {
1533                 ret = WIMLIB_ERR_NOMEM;
1534                 goto out_destroy_capture_config;
1535         }
1536         sd->total_length = 8;
1537         sd->refcnt = 1;
1538
1539         DEBUG("Using %zu capture sources", num_sources);
1540         canonicalize_targets(sources, num_sources);
1541         sort_sources(sources, num_sources);
1542         ret = check_sorted_sources(sources, num_sources, add_image_flags);
1543         if (ret) {
1544                 ret = WIMLIB_ERR_INVALID_PARAM;
1545                 goto out_free_security_data;
1546         }
1547
1548         DEBUG("Building dentry tree.");
1549         if (num_sources == 0) {
1550                 root_dentry = new_filler_directory("");
1551                 if (!root_dentry) {
1552                         ret = WIMLIB_ERR_NOMEM;
1553                         goto out_free_security_data;
1554                 }
1555         } else {
1556                 size_t i;
1557
1558 #if defined(__CYGWIN__) || defined(__WIN32__)
1559                 win32_acquire_privilege(SE_BACKUP_NAME);
1560                 win32_acquire_privilege(SE_SECURITY_NAME);
1561                 win32_acquire_privilege(SE_TAKE_OWNERSHIP_NAME);
1562 #endif
1563                 root_dentry = NULL;
1564                 i = 0;
1565                 do {
1566                         int flags;
1567                         union wimlib_progress_info progress;
1568
1569                         DEBUG("Building dentry tree for source %zu of %zu "
1570                               "(\"%s\" => \"%s\")", i + 1, num_sources,
1571                               sources[i].fs_source_path,
1572                               sources[i].wim_target_path);
1573                         if (progress_func) {
1574                                 memset(&progress, 0, sizeof(progress));
1575                                 progress.scan.source = sources[i].fs_source_path;
1576                                 progress.scan.wim_target_path = sources[i].wim_target_path;
1577                                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_BEGIN, &progress);
1578                         }
1579                         ret = capture_config_set_prefix(&config,
1580                                                         sources[i].fs_source_path);
1581                         if (ret)
1582                                 goto out_free_dentry_tree;
1583                         flags = add_image_flags | WIMLIB_ADD_IMAGE_FLAG_SOURCE;
1584                         if (!*sources[i].wim_target_path)
1585                                 flags |= WIMLIB_ADD_IMAGE_FLAG_ROOT;
1586                         ret = (*capture_tree)(&branch, sources[i].fs_source_path,
1587                                               w->lookup_table, sd,
1588                                               &config,
1589                                               flags,
1590                                               progress_func, extra_arg);
1591                         if (ret) {
1592                                 ERROR("Failed to build dentry tree for `%s'",
1593                                       sources[i].fs_source_path);
1594                                 goto out_free_dentry_tree;
1595                         }
1596                         if (branch) {
1597                                 /* Use the target name, not the source name, for
1598                                  * the root of each branch from a capture
1599                                  * source.  (This will also set the root dentry
1600                                  * of the entire image to be unnamed.) */
1601                                 ret = set_dentry_name(branch,
1602                                                       path_basename(sources[i].wim_target_path));
1603                                 if (ret)
1604                                         goto out_free_branch;
1605
1606                                 ret = attach_branch(&root_dentry, branch,
1607                                                     sources[i].wim_target_path);
1608                                 if (ret)
1609                                         goto out_free_branch;
1610                         }
1611                         if (progress_func)
1612                                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_END, &progress);
1613                 } while (++i != num_sources);
1614         }
1615
1616         DEBUG("Calculating full paths of dentries.");
1617         ret = for_dentry_in_tree(root_dentry, calculate_dentry_full_path, NULL);
1618         if (ret != 0)
1619                 goto out_free_dentry_tree;
1620
1621         ret = add_new_dentry_tree(w, root_dentry, sd);
1622         if (ret != 0)
1623                 goto out_free_dentry_tree;
1624
1625         imd = &w->image_metadata[w->hdr.image_count - 1];
1626
1627         ret = dentry_tree_fix_inodes(root_dentry, &imd->inode_list);
1628         if (ret != 0)
1629                 goto out_destroy_imd;
1630
1631         DEBUG("Assigning hard link group IDs");
1632         assign_inode_numbers(&imd->inode_list);
1633
1634         ret = xml_add_image(w, name);
1635         if (ret != 0)
1636                 goto out_destroy_imd;
1637
1638         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_BOOT)
1639                 wimlib_set_boot_idx(w, w->hdr.image_count);
1640         ret = 0;
1641         goto out_destroy_capture_config;
1642 out_destroy_imd:
1643         destroy_image_metadata(&w->image_metadata[w->hdr.image_count - 1],
1644                                w->lookup_table);
1645         w->hdr.image_count--;
1646         goto out;
1647 out_free_branch:
1648         free_dentry_tree(branch, w->lookup_table);
1649 out_free_dentry_tree:
1650         free_dentry_tree(root_dentry, w->lookup_table);
1651 out_free_security_data:
1652         free_security_data(sd);
1653 out_destroy_capture_config:
1654         destroy_capture_config(&config);
1655 out:
1656 #if defined(__CYGWIN__) || defined(__WIN32__)
1657         win32_release_privilege(SE_BACKUP_NAME);
1658         win32_release_privilege(SE_SECURITY_NAME);
1659         win32_release_privilege(SE_TAKE_OWNERSHIP_NAME);
1660 #endif
1661         return ret;
1662 }
1663
1664 WIMLIBAPI int wimlib_add_image(WIMStruct *w, const char *source,
1665                                const char *name, const char *config_str,
1666                                size_t config_len, int add_image_flags,
1667                                wimlib_progress_func_t progress_func)
1668 {
1669         if (!source || !*source)
1670                 return WIMLIB_ERR_INVALID_PARAM;
1671
1672         char *fs_source_path = STRDUP(source);
1673         int ret;
1674         struct wimlib_capture_source capture_src = {
1675                 .fs_source_path = fs_source_path,
1676                 .wim_target_path = NULL,
1677                 .reserved = 0,
1678         };
1679         ret = wimlib_add_image_multisource(w, &capture_src, 1, name,
1680                                            config_str, config_len,
1681                                            add_image_flags, progress_func);
1682         FREE(fs_source_path);
1683         return ret;
1684 }