7c9199c9dd73d83faf057cc5320537ab47538053
[wimlib] / src / add_image.c
1 /*
2  * add_image.c
3  */
4
5 /*
6  * Copyright (C) 2012, 2013 Eric Biggers
7  *
8  * This file is part of wimlib, a library for working with WIM files.
9  *
10  * wimlib is free software; you can redistribute it and/or modify it under the
11  * terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option)
13  * any later version.
14  *
15  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17  * A PARTICULAR PURPOSE. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with wimlib; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #include "config.h"
25
26 #ifdef __WIN32__
27 #  include "win32.h"
28 #else
29 #  include <dirent.h>
30 #  include <sys/stat.h>
31 #  include <fnmatch.h>
32 #  include "timestamp.h"
33 #endif
34
35 #include "wimlib_internal.h"
36 #include "dentry.h"
37 #include "lookup_table.h"
38 #include "xml.h"
39
40 #include <ctype.h>
41 #include <errno.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45
46 #ifdef HAVE_ALLOCA_H
47 #  include <alloca.h>
48 #endif
49
50 /*
51  * Adds the dentry tree and security data for a new image to the image metadata
52  * array of the WIMStruct.
53  */
54 int
55 add_new_dentry_tree(WIMStruct *w, struct wim_dentry *root_dentry,
56                     struct wim_security_data *sd)
57 {
58         struct wim_lookup_table_entry *metadata_lte;
59         struct wim_image_metadata *imd;
60         struct wim_image_metadata *new_imd;
61
62         wimlib_assert(root_dentry != NULL);
63
64         DEBUG("Reallocating image metadata array for image_count = %u",
65               w->hdr.image_count + 1);
66         imd = CALLOC((w->hdr.image_count + 1), sizeof(struct wim_image_metadata));
67
68         if (!imd) {
69                 ERROR("Failed to allocate memory for new image metadata array");
70                 goto err;
71         }
72
73         memcpy(imd, w->image_metadata,
74                w->hdr.image_count * sizeof(struct wim_image_metadata));
75
76         metadata_lte = new_lookup_table_entry();
77         if (!metadata_lte)
78                 goto err_free_imd;
79
80         metadata_lte->resource_entry.flags = WIM_RESHDR_FLAG_METADATA;
81         random_hash(metadata_lte->hash);
82         lookup_table_insert(w->lookup_table, metadata_lte);
83
84         new_imd = &imd[w->hdr.image_count];
85
86         new_imd->root_dentry    = root_dentry;
87         new_imd->metadata_lte   = metadata_lte;
88         new_imd->security_data  = sd;
89         new_imd->modified       = 1;
90
91         FREE(w->image_metadata);
92         w->image_metadata = imd;
93         w->hdr.image_count++;
94         return 0;
95 err_free_imd:
96         FREE(imd);
97 err:
98         return WIMLIB_ERR_NOMEM;
99
100 }
101
102 #ifndef __WIN32__
103 /*
104  * build_dentry_tree():
105  *      Recursively builds a tree of WIM dentries from an on-disk directory
106  *      tree.
107  *
108  * @root_ret:   Place to return a pointer to the root of the dentry tree.  Only
109  *              modified if successful.  Set to NULL if the file or directory was
110  *              excluded from capture.
111  *
112  * @root_disk_path:  The path to the root of the directory tree on disk.
113  *
114  * @lookup_table: The lookup table for the WIM file.  For each file added to the
115  *              dentry tree being built, an entry is added to the lookup table,
116  *              unless an identical stream is already in the lookup table.
117  *              These lookup table entries that are added point to the path of
118  *              the file on disk.
119  *
120  * @sd:         Ignored.  (Security data only captured in NTFS mode.)
121  *
122  * @capture_config:
123  *              Configuration for files to be excluded from capture.
124  *
125  * @add_flags:  Bitwise or of WIMLIB_ADD_IMAGE_FLAG_*
126  *
127  * @extra_arg:  Ignored in UNIX builds; used to pass sd_set pointer in Windows
128  *              builds.
129  *
130  * @return:     0 on success, nonzero on failure.  It is a failure if any of
131  *              the files cannot be `stat'ed, or if any of the needed
132  *              directories cannot be opened or read.  Failure to add the files
133  *              to the WIM may still occur later when trying to actually read
134  *              the on-disk files during a call to wimlib_write() or
135  *              wimlib_overwrite().
136  */
137 static int
138 unix_build_dentry_tree(struct wim_dentry **root_ret,
139                        const mbchar *root_disk_path,
140                        struct wim_lookup_table *lookup_table,
141                        struct wim_security_data *sd,
142                        const struct capture_config *config,
143                        int add_image_flags,
144                        wimlib_progress_func_t progress_func,
145                        void *extra_arg)
146 {
147         struct wim_dentry *root = NULL;
148         int ret = 0;
149         struct wim_inode *inode;
150
151         if (exclude_path(root_disk_path, config, true)) {
152                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) {
153                         ERROR("Cannot exclude the root directory from capture");
154                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
155                         goto out;
156                 }
157                 if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
158                     && progress_func)
159                 {
160                         union wimlib_progress_info info;
161                         info.scan.cur_path = root_disk_path;
162                         info.scan.excluded = true;
163                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
164                 }
165                 goto out;
166         }
167
168         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
169             && progress_func)
170         {
171                 union wimlib_progress_info info;
172                 info.scan.cur_path = root_disk_path;
173                 info.scan.excluded = false;
174                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
175         }
176
177         /* UNIX version of capturing a directory tree */
178         struct stat root_stbuf;
179         int (*stat_fn)(const char *restrict, struct stat *restrict);
180         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE)
181                 stat_fn = stat;
182         else
183                 stat_fn = lstat;
184
185         ret = (*stat_fn)(root_disk_path, &root_stbuf);
186         if (ret != 0) {
187                 ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
188                 goto out;
189         }
190
191         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) &&
192               !S_ISDIR(root_stbuf.st_mode))
193         {
194                 /* Do a dereference-stat in case the root is a symbolic link.
195                  * This case is allowed, provided that the symbolic link points
196                  * to a directory. */
197                 ret = stat(root_disk_path, &root_stbuf);
198                 if (ret != 0) {
199                         ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
200                         ret = WIMLIB_ERR_STAT;
201                         goto out;
202                 }
203                 if (!S_ISDIR(root_stbuf.st_mode)) {
204                         ERROR("`%s' is not a directory", root_disk_path);
205                         ret = WIMLIB_ERR_NOTDIR;
206                         goto out;
207                 }
208         }
209         if (!S_ISREG(root_stbuf.st_mode) && !S_ISDIR(root_stbuf.st_mode)
210             && !S_ISLNK(root_stbuf.st_mode)) {
211                 ERROR("`%s' is not a regular file, directory, or symbolic link.",
212                       root_disk_path);
213                 ret = WIMLIB_ERR_SPECIAL_FILE;
214                 goto out;
215         }
216
217         ret = new_dentry_with_timeless_inode(path_basename(root_disk_path),
218                                              &root);
219         if (ret)
220                 goto out;
221
222         inode = root->d_inode;
223
224 #ifdef HAVE_STAT_NANOSECOND_PRECISION
225         inode->i_creation_time = timespec_to_wim_timestamp(root_stbuf.st_mtim);
226         inode->i_last_write_time = timespec_to_wim_timestamp(root_stbuf.st_mtim);
227         inode->i_last_access_time = timespec_to_wim_timestamp(root_stbuf.st_atim);
228 #else
229         inode->i_creation_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
230         inode->i_last_write_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
231         inode->i_last_access_time = unix_timestamp_to_wim(root_stbuf.st_atime);
232 #endif
233         /* Leave the inode number at 0 for directories. */
234         if (!S_ISDIR(root_stbuf.st_mode)) {
235                 if (sizeof(ino_t) >= 8)
236                         inode->i_ino = (u64)root_stbuf.st_ino;
237                 else
238                         inode->i_ino = (u64)root_stbuf.st_ino |
239                                            ((u64)root_stbuf.st_dev <<
240                                                 ((sizeof(ino_t) * 8) & 63));
241         }
242         inode->i_resolved = 1;
243         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
244                 ret = inode_set_unix_data(inode, root_stbuf.st_uid,
245                                           root_stbuf.st_gid,
246                                           root_stbuf.st_mode,
247                                           lookup_table,
248                                           UNIX_DATA_ALL | UNIX_DATA_CREATE);
249                 if (ret)
250                         goto out;
251         }
252         add_image_flags &= ~(WIMLIB_ADD_IMAGE_FLAG_ROOT | WIMLIB_ADD_IMAGE_FLAG_SOURCE);
253         if (S_ISREG(root_stbuf.st_mode)) { /* Archiving a regular file */
254
255                 struct wim_lookup_table_entry *lte;
256                 u8 hash[SHA1_HASH_SIZE];
257
258                 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
259
260                 /* Empty files do not have to have a lookup table entry. */
261                 if (root_stbuf.st_size == 0)
262                         goto out;
263
264                 /* For each regular file, we must check to see if the file is in
265                  * the lookup table already; if it is, we increment its refcnt;
266                  * otherwise, we create a new lookup table entry and insert it.
267                  * */
268
269                 ret = sha1sum(root_disk_path, hash);
270                 if (ret != 0)
271                         goto out;
272
273                 lte = __lookup_resource(lookup_table, hash);
274                 if (lte) {
275                         lte->refcnt++;
276                         DEBUG("Add lte reference %u for `%s'", lte->refcnt,
277                               root_disk_path);
278                 } else {
279                         mbchar *file_on_disk = STRDUP(root_disk_path);
280                         if (!file_on_disk) {
281                                 ERROR("Failed to allocate memory for file path");
282                                 ret = WIMLIB_ERR_NOMEM;
283                                 goto out;
284                         }
285                         lte = new_lookup_table_entry();
286                         if (!lte) {
287                                 FREE(file_on_disk);
288                                 ret = WIMLIB_ERR_NOMEM;
289                                 goto out;
290                         }
291                         lte->file_on_disk = file_on_disk;
292                         lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
293                         lte->resource_entry.original_size = root_stbuf.st_size;
294                         lte->resource_entry.size = root_stbuf.st_size;
295                         copy_hash(lte->hash, hash);
296                         lookup_table_insert(lookup_table, lte);
297                 }
298                 root->d_inode->i_lte = lte;
299         } else if (S_ISDIR(root_stbuf.st_mode)) { /* Archiving a directory */
300
301                 inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
302
303                 DIR *dir;
304                 struct dirent entry, *result;
305                 struct wim_dentry *child;
306
307                 dir = opendir(root_disk_path);
308                 if (!dir) {
309                         ERROR_WITH_ERRNO("Failed to open the directory `%s'",
310                                          root_disk_path);
311                         ret = WIMLIB_ERR_OPEN;
312                         goto out;
313                 }
314
315                 /* Buffer for names of files in directory. */
316                 size_t len = strlen(root_disk_path);
317                 mbchar name[len + 1 + FILENAME_MAX + 1];
318                 memcpy(name, root_disk_path, len);
319                 name[len] = '/';
320
321                 /* Create a dentry for each entry in the directory on disk, and recurse
322                  * to any subdirectories. */
323                 while (1) {
324                         errno = 0;
325                         ret = readdir_r(dir, &entry, &result);
326                         if (ret != 0) {
327                                 ret = WIMLIB_ERR_READ;
328                                 ERROR_WITH_ERRNO("Error reading the "
329                                                  "directory `%s'",
330                                                  root_disk_path);
331                                 break;
332                         }
333                         if (result == NULL)
334                                 break;
335                         if (result->d_name[0] == '.' && (result->d_name[1] == '\0'
336                               || (result->d_name[1] == '.' && result->d_name[2] == '\0')))
337                                         continue;
338                         strcpy(name + len + 1, result->d_name);
339                         ret = unix_build_dentry_tree(&child, name,
340                                                      lookup_table,
341                                                      NULL, config,
342                                                      add_image_flags,
343                                                      progress_func, NULL);
344                         if (ret != 0)
345                                 break;
346                         if (child)
347                                 dentry_add_child(root, child);
348                 }
349                 closedir(dir);
350         } else { /* Archiving a symbolic link */
351                 inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
352                 inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
353
354                 /* The idea here is to call readlink() to get the UNIX target of
355                  * the symbolic link, then turn the target into a reparse point
356                  * data buffer that contains a relative or absolute symbolic
357                  * link (NOT a junction point or *full* path symbolic link with
358                  * drive letter).
359                  */
360
361                 mbchar deref_name_buf[4096];
362                 ssize_t deref_name_len;
363
364                 deref_name_len = readlink(root_disk_path, deref_name_buf,
365                                           sizeof(deref_name_buf) - 1);
366                 if (deref_name_len >= 0) {
367                         deref_name_buf[deref_name_len] = '\0';
368                         DEBUG("Read symlink `%s'", deref_name_buf);
369                         ret = inode_set_symlink(root->d_inode, deref_name_buf,
370                                                 lookup_table, NULL);
371                         if (ret == 0) {
372                                 /*
373                                  * Unfortunately, Windows seems to have the
374                                  * concept of "file" symbolic links as being
375                                  * different from "directory" symbolic links...
376                                  * so FILE_ATTRIBUTE_DIRECTORY needs to be set
377                                  * on the symbolic link if the *target* of the
378                                  * symbolic link is a directory.
379                                  */
380                                 struct stat stbuf;
381                                 if (stat(root_disk_path, &stbuf) == 0 &&
382                                     S_ISDIR(stbuf.st_mode))
383                                 {
384                                         inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
385                                 }
386                         }
387                 } else {
388                         ERROR_WITH_ERRNO("Failed to read target of "
389                                          "symbolic link `%s'", root_disk_path);
390                         ret = WIMLIB_ERR_READLINK;
391                 }
392         }
393 out:
394         if (ret == 0)
395                 *root_ret = root;
396         else
397                 free_dentry_tree(root, lookup_table);
398         return ret;
399 }
400 #endif /* !__WIN32__ */
401
402 enum pattern_type {
403         NONE = 0,
404         EXCLUSION_LIST,
405         EXCLUSION_EXCEPTION,
406         COMPRESSION_EXCLUSION_LIST,
407         ALIGNMENT_LIST,
408 };
409
410 #define COMPAT_DEFAULT_CONFIG
411
412 /* Default capture configuration file when none is specified. */
413 static const mbchar *default_config =
414 #ifdef COMPAT_DEFAULT_CONFIG /* XXX: This policy is being moved to library
415                                 users.  The next ABI-incompatible library
416                                 version will default to the empty string here. */
417 "[ExclusionList]\n"
418 "\\$ntfs.log\n"
419 "\\hiberfil.sys\n"
420 "\\pagefile.sys\n"
421 "\\System Volume Information\n"
422 "\\RECYCLER\n"
423 "\\Windows\\CSC\n"
424 "\n"
425 "[CompressionExclusionList]\n"
426 "*.mp3\n"
427 "*.zip\n"
428 "*.cab\n"
429 "\\WINDOWS\\inf\\*.pnf\n";
430 #else
431 "";
432 #endif
433
434 static void
435 destroy_pattern_list(struct pattern_list *list)
436 {
437         FREE(list->pats);
438 }
439
440 static void
441 destroy_capture_config(struct capture_config *config)
442 {
443         destroy_pattern_list(&config->exclusion_list);
444         destroy_pattern_list(&config->exclusion_exception);
445         destroy_pattern_list(&config->compression_exclusion_list);
446         destroy_pattern_list(&config->alignment_list);
447         FREE(config->config_str);
448         FREE(config->prefix);
449         memset(config, 0, sizeof(*config));
450 }
451
452 static int
453 pattern_list_add_pattern(struct pattern_list *list, const mbchar *pattern)
454 {
455         const char **pats;
456         if (list->num_pats >= list->num_allocated_pats) {
457                 pats = REALLOC(list->pats,
458                                sizeof(list->pats[0]) * (list->num_allocated_pats + 8));
459                 if (!pats)
460                         return WIMLIB_ERR_NOMEM;
461                 list->num_allocated_pats += 8;
462                 list->pats = pats;
463         }
464         list->pats[list->num_pats++] = pattern;
465         return 0;
466 }
467
468 /* Parses the contents of the image capture configuration file and fills in a
469  * `struct capture_config'. */
470 static int
471 init_capture_config(struct capture_config *config,
472                     const mbchar *_config_str, size_t config_len)
473 {
474         mbchar *config_str;
475         mbchar *p;
476         mbchar *eol;
477         mbchar *next_p;
478         size_t bytes_remaining;
479         enum pattern_type type = NONE;
480         int ret;
481         unsigned long line_no = 0;
482
483         DEBUG("config_len = %zu", config_len);
484         bytes_remaining = config_len;
485         memset(config, 0, sizeof(*config));
486         config_str = MALLOC(config_len);
487         if (!config_str) {
488                 ERROR("Could not duplicate capture config string");
489                 return WIMLIB_ERR_NOMEM;
490         }
491
492         memcpy(config_str, _config_str, config_len);
493         next_p = config_str;
494         config->config_str = config_str;
495         while (bytes_remaining) {
496                 line_no++;
497                 p = next_p;
498                 eol = memchr(p, '\n', bytes_remaining);
499                 if (!eol) {
500                         ERROR("Expected end-of-line in capture config file on "
501                               "line %lu", line_no);
502                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
503                         goto out_destroy;
504                 }
505
506                 next_p = eol + 1;
507                 bytes_remaining -= (next_p - p);
508                 if (eol == p)
509                         continue;
510
511                 if (*(eol - 1) == '\r')
512                         eol--;
513                 *eol = '\0';
514
515                 /* Translate backslash to forward slash */
516                 for (mbchar *pp = p; pp != eol; pp++)
517                         if (*pp == '\\')
518                                 *pp = '/';
519
520                 /* Remove drive letter */
521                 if (eol - p > 2 && isalpha(*p) && *(p + 1) == ':')
522                         p += 2;
523
524                 ret = 0;
525                 if (strcmp(p, "[ExclusionList]") == 0)
526                         type = EXCLUSION_LIST;
527                 else if (strcmp(p, "[ExclusionException]") == 0)
528                         type = EXCLUSION_EXCEPTION;
529                 else if (strcmp(p, "[CompressionExclusionList]") == 0)
530                         type = COMPRESSION_EXCLUSION_LIST;
531                 else if (strcmp(p, "[AlignmentList]") == 0)
532                         type = ALIGNMENT_LIST;
533                 else if (p[0] == '[' && strrchr(p, ']')) {
534                         ERROR("Unknown capture configuration section `%s'", p);
535                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
536                 } else switch (type) {
537                 case EXCLUSION_LIST:
538                         DEBUG("Adding pattern \"%s\" to exclusion list", p);
539                         ret = pattern_list_add_pattern(&config->exclusion_list, p);
540                         break;
541                 case EXCLUSION_EXCEPTION:
542                         DEBUG("Adding pattern \"%s\" to exclusion exception list", p);
543                         ret = pattern_list_add_pattern(&config->exclusion_exception, p);
544                         break;
545                 case COMPRESSION_EXCLUSION_LIST:
546                         DEBUG("Adding pattern \"%s\" to compression exclusion list", p);
547                         ret = pattern_list_add_pattern(&config->compression_exclusion_list, p);
548                         break;
549                 case ALIGNMENT_LIST:
550                         DEBUG("Adding pattern \"%s\" to alignment list", p);
551                         ret = pattern_list_add_pattern(&config->alignment_list, p);
552                         break;
553                 default:
554                         ERROR("Line %lu of capture configuration is not "
555                               "in a block (such as [ExclusionList])",
556                               line_no);
557                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
558                         break;
559                 }
560                 if (ret != 0)
561                         goto out_destroy;
562         }
563         return 0;
564 out_destroy:
565         destroy_capture_config(config);
566         return ret;
567 }
568
569 static int capture_config_set_prefix(struct capture_config *config,
570                                      const mbchar *_prefix)
571 {
572         mbchar *prefix = STRDUP(_prefix);
573
574         if (!prefix)
575                 return WIMLIB_ERR_NOMEM;
576         FREE(config->prefix);
577         config->prefix = prefix;
578         config->prefix_len = strlen(prefix);
579         return 0;
580 }
581
582 static bool match_pattern(const mbchar *path,
583                           const mbchar *path_basename,
584                           const struct pattern_list *list)
585 {
586         for (size_t i = 0; i < list->num_pats; i++) {
587                 const char *pat = list->pats[i];
588                 const char *string;
589                 if (pat[0] == '/')
590                         /* Absolute path from root of capture */
591                         string = path;
592                 else {
593                         if (strchr(pat, '/'))
594                                 /* Relative path from root of capture */
595                                 string = path + 1;
596                         else
597                                 /* A file name pattern */
598                                 string = path_basename;
599                 }
600
601                 /* Warning: on Windows native builds, fnmatch() calls the
602                  * replacement function in win32.c. */
603                 if (fnmatch(pat, string, FNM_PATHNAME
604                                 #ifdef FNM_CASEFOLD
605                                         | FNM_CASEFOLD
606                                 #endif
607                             ) == 0)
608                 {
609                         DEBUG("`%s' matches the pattern \"%s\"",
610                               string, pat);
611                         return true;
612                 }
613         }
614         return false;
615 }
616
617 /* Return true if the image capture configuration file indicates we should
618  * exclude the filename @path from capture.
619  *
620  * If @exclude_prefix is %true, the part of the path up and including the name
621  * of the directory being captured is not included in the path for matching
622  * purposes.  This allows, for example, a pattern like /hiberfil.sys to match a
623  * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7
624  * directory.
625  */
626 bool
627 exclude_path(const mbchar *path, const struct capture_config *config,
628              bool exclude_prefix)
629 {
630         const mbchar *basename = path_basename(path);
631         if (exclude_prefix) {
632                 wimlib_assert(strlen(path) >= config->prefix_len);
633                 if (memcmp(config->prefix, path, config->prefix_len) == 0
634                      && path[config->prefix_len] == '/')
635                         path += config->prefix_len;
636         }
637         return match_pattern(path, basename, &config->exclusion_list) &&
638                 !match_pattern(path, basename, &config->exclusion_exception);
639
640 }
641
642 /* Strip leading and trailing forward slashes from a string.  Modifies it in
643  * place and returns the stripped string. */
644 static const char *
645 canonicalize_target_path(char *target_path)
646 {
647         char *p;
648         if (target_path == NULL)
649                 return "";
650         for (;;) {
651                 if (*target_path == '\0')
652                         return target_path;
653                 else if (*target_path == '/')
654                         target_path++;
655                 else
656                         break;
657         }
658
659         p = target_path + strlen(target_path) - 1;
660         while (*p == '/')
661                 *p-- = '\0';
662         return target_path;
663 }
664
665 #ifdef __WIN32__
666 static void
667 zap_backslashes(char *s)
668 {
669         while (*s) {
670                 if (*s == '\\')
671                         *s = '/';
672                 s++;
673         }
674 }
675 #endif
676
677 /* Strip leading and trailing slashes from the target paths */
678 static void
679 canonicalize_targets(struct wimlib_capture_source *sources, size_t num_sources)
680 {
681         while (num_sources--) {
682                 DEBUG("Canonicalizing { source: \"%s\", target=\"%s\"}",
683                       sources->fs_source_path,
684                       sources->wim_target_path);
685 #ifdef __WIN32__
686                 /* The Windows API can handle forward slashes.  Just get rid of
687                  * backslashes to avoid confusing other parts of the library
688                  * code. */
689                 zap_backslashes(sources->fs_source_path);
690                 if (sources->wim_target_path)
691                         zap_backslashes(sources->wim_target_path);
692 #endif
693                 sources->wim_target_path =
694                         (char*)canonicalize_target_path(sources->wim_target_path);
695                 DEBUG("Canonical target: \"%s\"", sources->wim_target_path);
696                 sources++;
697         }
698 }
699
700 static int
701 capture_source_cmp(const void *p1, const void *p2)
702 {
703         const struct wimlib_capture_source *s1 = p1, *s2 = p2;
704         return strcmp(s1->wim_target_path, s2->wim_target_path);
705 }
706
707 /* Sorts the capture sources lexicographically by target path.  This occurs
708  * after leading and trailing forward slashes are stripped.
709  *
710  * One purpose of this is to make sure that target paths that are inside other
711  * target paths are added after the containing target paths. */
712 static void
713 sort_sources(struct wimlib_capture_source *sources, size_t num_sources)
714 {
715         qsort(sources, num_sources, sizeof(sources[0]), capture_source_cmp);
716 }
717
718 static int
719 check_sorted_sources(struct wimlib_capture_source *sources, size_t num_sources,
720                      int add_image_flags)
721 {
722         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
723                 if (num_sources != 1) {
724                         ERROR("Must specify exactly 1 capture source "
725                               "(the NTFS volume) in NTFS mode!");
726                         return WIMLIB_ERR_INVALID_PARAM;
727                 }
728                 if (sources[0].wim_target_path[0] != '\0') {
729                         ERROR("In NTFS capture mode the target path inside "
730                               "the image must be the root directory!");
731                         return WIMLIB_ERR_INVALID_PARAM;
732                 }
733         } else if (num_sources != 0) {
734                 /* This code is disabled because the current code
735                  * unconditionally attempts to do overlays.  So, duplicate
736                  * target paths are OK. */
737         #if 0
738                 if (num_sources > 1 && sources[0].wim_target_path[0] == '\0') {
739                         ERROR("Cannot specify root target when using multiple "
740                               "capture sources!");
741                         return WIMLIB_ERR_INVALID_PARAM;
742                 }
743                 for (size_t i = 0; i < num_sources - 1; i++) {
744                         size_t len = strlen(sources[i].wim_target_path);
745                         size_t j = i + 1;
746                         const char *target1 = sources[i].wim_target_path;
747                         do {
748                                 const char *target2 = sources[j].wim_target_path;
749                                 DEBUG("target1=%s, target2=%s",
750                                       target1,target2);
751                                 if (strncmp(target1, target2, len) ||
752                                     target2[len] > '/')
753                                         break;
754                                 if (target2[len] == '/') {
755                                         ERROR("Invalid target `%s': is a prefix of `%s'",
756                                               target1, target2);
757                                         return WIMLIB_ERR_INVALID_PARAM;
758                                 }
759                                 if (target2[len] == '\0') {
760                                         ERROR("Invalid target `%s': is a duplicate of `%s'",
761                                               target1, target2);
762                                         return WIMLIB_ERR_INVALID_PARAM;
763                                 }
764                         } while (++j != num_sources);
765                 }
766         #endif
767         }
768         return 0;
769
770 }
771
772 /* Creates a new directory to place in the WIM image.  This is to create parent
773  * directories that are not part of any target as needed.  */
774 static int
775 new_filler_directory(const mbchar *name, struct wim_dentry **dentry_ret)
776 {
777         int ret;
778         struct wim_dentry *dentry;
779
780         DEBUG("Creating filler directory \"%s\"", name);
781         ret = new_dentry_with_inode(name, &dentry);
782         if (ret == 0) {
783                 /* Leave the inode number as 0 for now.  The final inode number
784                  * will be assigned later by assign_inode_numbers(). */
785                 dentry->d_inode->i_resolved = 1;
786                 dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
787                 *dentry_ret = dentry;
788         }
789         return ret;
790 }
791
792 /* Transfers the children of @branch to @target.  It is an error if @target is
793  * not a directory or if both @branch and @target contain a child dentry with
794  * the same name. */
795 static int
796 do_overlay(struct wim_dentry *target, struct wim_dentry *branch)
797 {
798         struct rb_root *rb_root;
799
800         DEBUG("Doing overlay \"%W\" => \"%W\"",
801               branch->file_name, target->file_name);
802
803         if (!dentry_is_directory(target)) {
804                 ERROR("Cannot overlay directory \"%W\" over non-directory",
805                       branch->file_name);
806                 return WIMLIB_ERR_INVALID_OVERLAY;
807         }
808
809         rb_root = &branch->d_inode->i_children;
810         while (rb_root->rb_node) { /* While @branch has children... */
811                 struct wim_dentry *child = rbnode_dentry(rb_root->rb_node);
812                 /* Move @child to the directory @target */
813                 unlink_dentry(child);
814                 if (!dentry_add_child(target, child)) {
815                         /* Revert the change to avoid leaking the directory tree
816                          * rooted at @child */
817                         dentry_add_child(branch, child);
818                         ERROR("Overlay error: file \"%W\" already exists "
819                               "as a child of \"%W\"",
820                               child->file_name, target->file_name);
821                         return WIMLIB_ERR_INVALID_OVERLAY;
822                 }
823         }
824         free_dentry(branch);
825         return 0;
826
827 }
828
829 /* Attach or overlay a branch onto the WIM image.
830  *
831  * @root_p:
832  *      Pointer to the root of the WIM image, or pointer to NULL if it has not
833  *      been created yet.
834  * @branch
835  *      Branch to add.
836  * @target_path:
837  *      Path in the WIM image to add the branch, with leading and trailing
838  *      slashes stripped.
839  */
840 static int
841 attach_branch(struct wim_dentry **root_p, struct wim_dentry *branch,
842               mbchar *target_path)
843 {
844         char *slash;
845         struct wim_dentry *dentry, *parent, *target;
846         int ret;
847
848         DEBUG("Attaching branch \"%W\" => \"%s\"",
849               branch->file_name, target_path);
850
851         if (*target_path == '\0') {
852                 /* Target: root directory */
853                 if (*root_p) {
854                         /* Overlay on existing root */
855                         return do_overlay(*root_p, branch);
856                 } else  {
857                         /* Set as root */
858                         *root_p = branch;
859                         return 0;
860                 }
861         }
862
863         /* Adding a non-root branch.  Create root if it hasn't been created
864          * already. */
865         if (!*root_p) {
866                 ret  = new_filler_directory("", root_p);
867                 if (ret)
868                         return ret;
869         }
870
871         /* Walk the path to the branch, creating filler directories as needed.
872          * */
873         parent = *root_p;
874         while ((slash = strchr(target_path, '/'))) {
875                 *slash = '\0';
876                 dentry = get_dentry_child_with_name(parent, target_path);
877                 if (!dentry) {
878                         ret = new_filler_directory(target_path, &dentry);
879                         if (ret)
880                                 return ret;
881                         dentry_add_child(parent, dentry);
882                 }
883                 parent = dentry;
884                 target_path = slash;
885                 /* Skip over slashes.  Note: this cannot overrun the length of
886                  * the string because the last character cannot be a slash, as
887                  * trailing slashes were tripped.  */
888                 do {
889                         ++target_path;
890                 } while (*target_path == '/');
891         }
892
893         /* If the target path already existed, overlay the branch onto it.
894          * Otherwise, set the branch as the target path. */
895         target = get_dentry_child_with_utf16le_name(parent, branch->file_name,
896                                                     branch->file_name_nbytes);
897         if (target) {
898                 return do_overlay(target, branch);
899         } else {
900                 dentry_add_child(parent, branch);
901                 return 0;
902         }
903 }
904
905 WIMLIBAPI int
906 wimlib_add_image_multisource(WIMStruct *w,
907                              struct wimlib_capture_source *sources,
908                              size_t num_sources,
909                              const utf8char *name,
910                              const mbchar *config_str,
911                              size_t config_len,
912                              int add_image_flags,
913                              wimlib_progress_func_t progress_func)
914 {
915         int (*capture_tree)(struct wim_dentry **,
916                             const mbchar *,
917                             struct wim_lookup_table *,
918                             struct wim_security_data *,
919                             const struct capture_config *,
920                             int,
921                             wimlib_progress_func_t,
922                             void *);
923         void *extra_arg;
924         struct wim_dentry *root_dentry;
925         struct wim_dentry *branch;
926         struct wim_security_data *sd;
927         struct capture_config config;
928         struct wim_image_metadata *imd;
929         int ret;
930
931         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
932 #ifdef WITH_NTFS_3G
933                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
934                         ERROR("Cannot dereference files when capturing directly from NTFS");
935                         return WIMLIB_ERR_INVALID_PARAM;
936                 }
937                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
938                         ERROR("Capturing UNIX owner and mode not supported "
939                               "when capturing directly from NTFS");
940                         return WIMLIB_ERR_INVALID_PARAM;
941                 }
942                 capture_tree = build_dentry_tree_ntfs;
943                 extra_arg = &w->ntfs_vol;
944 #else
945                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
946                       "        cannot capture a WIM image directly from a NTFS volume!");
947                 return WIMLIB_ERR_UNSUPPORTED;
948 #endif
949         } else {
950         #ifdef __WIN32__
951                 capture_tree = win32_build_dentry_tree;
952         #else
953                 capture_tree = unix_build_dentry_tree;
954         #endif
955                 extra_arg = NULL;
956         }
957
958 #ifdef __WIN32__
959         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
960                 ERROR("Capturing UNIX-specific data is not supported on Windows");
961                 return WIMLIB_ERR_INVALID_PARAM;
962         }
963         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
964                 ERROR("Dereferencing symbolic links is not supported on Windows");
965                 return WIMLIB_ERR_INVALID_PARAM;
966         }
967 #endif
968
969         if (!name || !*name) {
970                 ERROR("Must specify a non-empty string for the image name");
971                 return WIMLIB_ERR_INVALID_PARAM;
972         }
973
974         if (w->hdr.total_parts != 1) {
975                 ERROR("Cannot add an image to a split WIM");
976                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
977         }
978
979         if (wimlib_image_name_in_use(w, name)) {
980                 ERROR("There is already an image named \"%U\" in the WIM!",
981                       name);
982                 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
983         }
984
985         if (!config_str) {
986                 DEBUG("Using default capture configuration");
987                 config_str = default_config;
988                 config_len = strlen(default_config);
989         }
990         ret = init_capture_config(&config, config_str, config_len);
991         if (ret)
992                 goto out;
993
994         DEBUG("Allocating security data");
995         sd = CALLOC(1, sizeof(struct wim_security_data));
996         if (!sd) {
997                 ret = WIMLIB_ERR_NOMEM;
998                 goto out_destroy_capture_config;
999         }
1000         sd->total_length = 8;
1001         sd->refcnt = 1;
1002
1003         DEBUG("Using %zu capture sources", num_sources);
1004         canonicalize_targets(sources, num_sources);
1005         sort_sources(sources, num_sources);
1006         ret = check_sorted_sources(sources, num_sources, add_image_flags);
1007         if (ret) {
1008                 ret = WIMLIB_ERR_INVALID_PARAM;
1009                 goto out_free_security_data;
1010         }
1011
1012         DEBUG("Building dentry tree.");
1013         root_dentry = NULL;
1014
1015         for (size_t i = 0; i < num_sources; i++) {
1016                 int flags;
1017                 union wimlib_progress_info progress;
1018
1019                 DEBUG("Building dentry tree for source %zu of %zu "
1020                       "(\"%s\" => \"%s\")", i + 1, num_sources,
1021                       sources[i].fs_source_path,
1022                       sources[i].wim_target_path);
1023                 if (progress_func) {
1024                         memset(&progress, 0, sizeof(progress));
1025                         progress.scan.source = sources[i].fs_source_path;
1026                         progress.scan.wim_target_path = sources[i].wim_target_path;
1027                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_BEGIN, &progress);
1028                 }
1029                 ret = capture_config_set_prefix(&config,
1030                                                 sources[i].fs_source_path);
1031                 if (ret)
1032                         goto out_free_dentry_tree;
1033                 flags = add_image_flags | WIMLIB_ADD_IMAGE_FLAG_SOURCE;
1034                 if (!*sources[i].wim_target_path)
1035                         flags |= WIMLIB_ADD_IMAGE_FLAG_ROOT;
1036                 ret = (*capture_tree)(&branch, sources[i].fs_source_path,
1037                                       w->lookup_table, sd,
1038                                       &config,
1039                                       flags,
1040                                       progress_func, extra_arg);
1041                 if (ret) {
1042                         ERROR("Failed to build dentry tree for `%s'",
1043                               sources[i].fs_source_path);
1044                         goto out_free_dentry_tree;
1045                 }
1046                 if (branch) {
1047                         /* Use the target name, not the source name, for
1048                          * the root of each branch from a capture
1049                          * source.  (This will also set the root dentry
1050                          * of the entire image to be unnamed.) */
1051                         ret = set_dentry_name(branch,
1052                                               path_basename(sources[i].wim_target_path));
1053                         if (ret)
1054                                 goto out_free_branch;
1055
1056                         ret = attach_branch(&root_dentry, branch,
1057                                             sources[i].wim_target_path);
1058                         if (ret)
1059                                 goto out_free_branch;
1060                 }
1061                 if (progress_func)
1062                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_END, &progress);
1063         }
1064
1065         if (root_dentry == NULL) {
1066                 ret = new_filler_directory("" , &root_dentry);
1067                 if (ret)
1068                         goto out_free_dentry_tree;
1069         }
1070
1071         DEBUG("Calculating full paths of dentries.");
1072         ret = for_dentry_in_tree(root_dentry, calculate_dentry_full_path, NULL);
1073         if (ret)
1074                 goto out_free_dentry_tree;
1075
1076         ret = add_new_dentry_tree(w, root_dentry, sd);
1077         if (ret)
1078                 goto out_free_dentry_tree;
1079
1080         imd = &w->image_metadata[w->hdr.image_count - 1];
1081
1082         ret = dentry_tree_fix_inodes(root_dentry, &imd->inode_list);
1083         if (ret)
1084                 goto out_destroy_imd;
1085
1086         DEBUG("Assigning hard link group IDs");
1087         assign_inode_numbers(&imd->inode_list);
1088
1089         ret = xml_add_image(w, name);
1090         if (ret)
1091                 goto out_destroy_imd;
1092
1093         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_BOOT)
1094                 wimlib_set_boot_idx(w, w->hdr.image_count);
1095         ret = 0;
1096         goto out_destroy_capture_config;
1097 out_destroy_imd:
1098         destroy_image_metadata(&w->image_metadata[w->hdr.image_count - 1],
1099                                w->lookup_table);
1100         w->hdr.image_count--;
1101         goto out;
1102 out_free_branch:
1103         free_dentry_tree(branch, w->lookup_table);
1104 out_free_dentry_tree:
1105         free_dentry_tree(root_dentry, w->lookup_table);
1106 out_free_security_data:
1107         free_security_data(sd);
1108 out_destroy_capture_config:
1109         destroy_capture_config(&config);
1110 out:
1111         return ret;
1112 }
1113
1114 WIMLIBAPI int
1115 wimlib_add_image(WIMStruct *w,
1116                  const mbchar *source,
1117                  const utf8char *name,
1118                  const mbchar *config_str,
1119                  size_t config_len,
1120                  int add_image_flags,
1121                  wimlib_progress_func_t progress_func)
1122 {
1123         if (!source || !*source)
1124                 return WIMLIB_ERR_INVALID_PARAM;
1125
1126         char *fs_source_path = STRDUP(source);
1127         int ret;
1128         struct wimlib_capture_source capture_src = {
1129                 .fs_source_path = fs_source_path,
1130                 .wim_target_path = NULL,
1131                 .reserved = 0,
1132         };
1133         ret = wimlib_add_image_multisource(w, &capture_src, 1, name,
1134                                            config_str, config_len,
1135                                            add_image_flags, progress_func);
1136         FREE(fs_source_path);
1137         return ret;
1138 }