fc2f6a200ca314e486942d66378bdd8ae1665833
[wimlib] / src / add_image.c
1 /*
2  * add_image.c
3  */
4
5 /*
6  * Copyright (C) 2012, 2013 Eric Biggers
7  *
8  * This file is part of wimlib, a library for working with WIM files.
9  *
10  * wimlib is free software; you can redistribute it and/or modify it under the
11  * terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option)
13  * any later version.
14  *
15  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17  * A PARTICULAR PURPOSE. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with wimlib; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #include "wimlib_internal.h"
25 #include "dentry.h"
26 #include "timestamp.h"
27 #include "lookup_table.h"
28 #include "xml.h"
29 #include <string.h>
30 #include <fnmatch.h>
31 #include <stdlib.h>
32 #include <ctype.h>
33 #include <sys/stat.h>
34 #include <dirent.h>
35 #include <errno.h>
36 #include <unistd.h>
37
38 #define WIMLIB_ADD_IMAGE_FLAG_ROOT      0x80000000
39 #define WIMLIB_ADD_IMAGE_FLAG_SOURCE    0x40000000
40
41 /*
42  * Adds the dentry tree and security data for a new image to the image metadata
43  * array of the WIMStruct.
44  */
45 int add_new_dentry_tree(WIMStruct *w, struct wim_dentry *root_dentry,
46                         struct wim_security_data *sd)
47 {
48         struct wim_lookup_table_entry *metadata_lte;
49         struct wim_image_metadata *imd;
50         struct wim_image_metadata *new_imd;
51
52         wimlib_assert(root_dentry != NULL);
53
54         DEBUG("Reallocating image metadata array for image_count = %u",
55               w->hdr.image_count + 1);
56         imd = CALLOC((w->hdr.image_count + 1), sizeof(struct wim_image_metadata));
57
58         if (!imd) {
59                 ERROR("Failed to allocate memory for new image metadata array");
60                 goto err;
61         }
62
63         memcpy(imd, w->image_metadata,
64                w->hdr.image_count * sizeof(struct wim_image_metadata));
65
66         metadata_lte = new_lookup_table_entry();
67         if (!metadata_lte)
68                 goto err_free_imd;
69
70         metadata_lte->resource_entry.flags = WIM_RESHDR_FLAG_METADATA;
71         random_hash(metadata_lte->hash);
72         lookup_table_insert(w->lookup_table, metadata_lte);
73
74         new_imd = &imd[w->hdr.image_count];
75
76         new_imd->root_dentry    = root_dentry;
77         new_imd->metadata_lte   = metadata_lte;
78         new_imd->security_data  = sd;
79         new_imd->modified       = 1;
80
81         FREE(w->image_metadata);
82         w->image_metadata = imd;
83         w->hdr.image_count++;
84         return 0;
85 err_free_imd:
86         FREE(imd);
87 err:
88         return WIMLIB_ERR_NOMEM;
89
90 }
91
92
93 /*
94  * build_dentry_tree():
95  *      Recursively builds a tree of WIM dentries from an on-disk directory
96  *      tree.
97  *
98  * @root_ret:   Place to return a pointer to the root of the dentry tree.  Only
99  *              modified if successful.  Set to NULL if the file or directory was
100  *              excluded from capture.
101  *
102  * @root_disk_path:  The path to the root of the directory tree on disk.
103  *
104  * @lookup_table: The lookup table for the WIM file.  For each file added to the
105  *              dentry tree being built, an entry is added to the lookup table,
106  *              unless an identical stream is already in the lookup table.
107  *              These lookup table entries that are added point to the path of
108  *              the file on disk.
109  *
110  * @sd:         Ignored.  (Security data only captured in NTFS mode.)
111  *
112  * @capture_config:
113  *              Configuration for files to be excluded from capture.
114  *
115  * @add_flags:  Bitwise or of WIMLIB_ADD_IMAGE_FLAG_*
116  *
117  * @extra_arg:  Ignored. (Only used in NTFS mode.)
118  *
119  * @return:     0 on success, nonzero on failure.  It is a failure if any of
120  *              the files cannot be `stat'ed, or if any of the needed
121  *              directories cannot be opened or read.  Failure to add the files
122  *              to the WIM may still occur later when trying to actually read
123  *              the on-disk files during a call to wimlib_write() or
124  *              wimlib_overwrite().
125  */
126 static int build_dentry_tree(struct wim_dentry **root_ret,
127                              const char *root_disk_path,
128                              struct wim_lookup_table *lookup_table,
129                              struct wim_security_data *sd,
130                              const struct capture_config *config,
131                              int add_image_flags,
132                              wimlib_progress_func_t progress_func,
133                              void *extra_arg)
134 {
135         struct stat root_stbuf;
136         int ret = 0;
137         int (*stat_fn)(const char *restrict, struct stat *restrict);
138         struct wim_dentry *root;
139         struct wim_inode *inode;
140
141         if (exclude_path(root_disk_path, config, true)) {
142                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) {
143                         ERROR("Cannot exclude the root directory from capture");
144                         return WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
145                 }
146                 if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
147                     && progress_func)
148                 {
149                         union wimlib_progress_info info;
150                         info.scan.cur_path = root_disk_path;
151                         info.scan.excluded = true;
152                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
153                 }
154                 *root_ret = NULL;
155                 return 0;
156         }
157
158         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
159             && progress_func)
160         {
161                 union wimlib_progress_info info;
162                 info.scan.cur_path = root_disk_path;
163                 info.scan.excluded = false;
164                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
165         }
166
167         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE)
168                 stat_fn = stat;
169         else
170                 stat_fn = lstat;
171
172         ret = (*stat_fn)(root_disk_path, &root_stbuf);
173         if (ret != 0) {
174                 ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
175                 return WIMLIB_ERR_STAT;
176         }
177
178         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) &&
179               !S_ISDIR(root_stbuf.st_mode))
180         {
181                 /* Do a dereference-stat in case the root is a symbolic link.
182                  * This case is allowed, provided that the symbolic link points
183                  * to a directory. */
184                 ret = stat(root_disk_path, &root_stbuf);
185                 if (ret != 0) {
186                         ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
187                         return WIMLIB_ERR_STAT;
188                 }
189                 if (!S_ISDIR(root_stbuf.st_mode)) {
190                         ERROR("`%s' is not a directory", root_disk_path);
191                         return WIMLIB_ERR_NOTDIR;
192                 }
193         }
194         if (!S_ISREG(root_stbuf.st_mode) && !S_ISDIR(root_stbuf.st_mode)
195             && !S_ISLNK(root_stbuf.st_mode)) {
196                 ERROR("`%s' is not a regular file, directory, or symbolic link.",
197                       root_disk_path);
198                 return WIMLIB_ERR_SPECIAL_FILE;
199         }
200
201         root = new_dentry_with_timeless_inode(path_basename(root_disk_path));
202         if (!root) {
203                 if (errno == EILSEQ)
204                         return WIMLIB_ERR_INVALID_UTF8_STRING;
205                 else if (errno == ENOMEM)
206                         return WIMLIB_ERR_NOMEM;
207                 else
208                         return WIMLIB_ERR_ICONV_NOT_AVAILABLE;
209         }
210
211         inode = root->d_inode;
212
213 #ifdef HAVE_STAT_NANOSECOND_PRECISION
214         inode->i_creation_time = timespec_to_wim_timestamp(&root_stbuf.st_mtim);
215         inode->i_last_write_time = timespec_to_wim_timestamp(&root_stbuf.st_mtim);
216         inode->i_last_access_time = timespec_to_wim_timestamp(&root_stbuf.st_atim);
217 #else
218         inode->i_creation_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
219         inode->i_last_write_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
220         inode->i_last_access_time = unix_timestamp_to_wim(root_stbuf.st_atime);
221 #endif
222         if (sizeof(ino_t) >= 8)
223                 inode->i_ino = (u64)root_stbuf.st_ino;
224         else
225                 inode->i_ino = (u64)root_stbuf.st_ino |
226                                    ((u64)root_stbuf.st_dev << ((sizeof(ino_t) * 8) & 63));
227         inode->i_resolved = 1;
228         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
229                 ret = inode_set_unix_data(inode, root_stbuf.st_uid,
230                                           root_stbuf.st_gid,
231                                           root_stbuf.st_mode,
232                                           lookup_table,
233                                           UNIX_DATA_ALL | UNIX_DATA_CREATE);
234                 if (ret)
235                         goto out;
236         }
237         add_image_flags &= ~(WIMLIB_ADD_IMAGE_FLAG_ROOT | WIMLIB_ADD_IMAGE_FLAG_SOURCE);
238         if (S_ISREG(root_stbuf.st_mode)) { /* Archiving a regular file */
239
240                 struct wim_lookup_table_entry *lte;
241                 u8 hash[SHA1_HASH_SIZE];
242
243                 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
244
245                 /* Empty files do not have to have a lookup table entry. */
246                 if (root_stbuf.st_size == 0)
247                         goto out;
248
249                 /* For each regular file, we must check to see if the file is in
250                  * the lookup table already; if it is, we increment its refcnt;
251                  * otherwise, we create a new lookup table entry and insert it.
252                  * */
253
254                 ret = sha1sum(root_disk_path, hash);
255                 if (ret != 0)
256                         goto out;
257
258                 lte = __lookup_resource(lookup_table, hash);
259                 if (lte) {
260                         lte->refcnt++;
261                         DEBUG("Add lte reference %u for `%s'", lte->refcnt,
262                               root_disk_path);
263                 } else {
264                         char *file_on_disk = STRDUP(root_disk_path);
265                         if (!file_on_disk) {
266                                 ERROR("Failed to allocate memory for file path");
267                                 ret = WIMLIB_ERR_NOMEM;
268                                 goto out;
269                         }
270                         lte = new_lookup_table_entry();
271                         if (!lte) {
272                                 FREE(file_on_disk);
273                                 ret = WIMLIB_ERR_NOMEM;
274                                 goto out;
275                         }
276                         lte->file_on_disk = file_on_disk;
277                         lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
278                         lte->resource_entry.original_size = root_stbuf.st_size;
279                         lte->resource_entry.size = root_stbuf.st_size;
280                         copy_hash(lte->hash, hash);
281                         lookup_table_insert(lookup_table, lte);
282                 }
283                 root->d_inode->i_lte = lte;
284         } else if (S_ISDIR(root_stbuf.st_mode)) { /* Archiving a directory */
285
286                 inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
287
288                 DIR *dir;
289                 struct dirent entry, *result;
290                 struct wim_dentry *child;
291
292                 dir = opendir(root_disk_path);
293                 if (!dir) {
294                         ERROR_WITH_ERRNO("Failed to open the directory `%s'",
295                                          root_disk_path);
296                         ret = WIMLIB_ERR_OPEN;
297                         goto out;
298                 }
299
300                 /* Buffer for names of files in directory. */
301                 size_t len = strlen(root_disk_path);
302                 char name[len + 1 + FILENAME_MAX + 1];
303                 memcpy(name, root_disk_path, len);
304                 name[len] = '/';
305
306                 /* Create a dentry for each entry in the directory on disk, and recurse
307                  * to any subdirectories. */
308                 while (1) {
309                         errno = 0;
310                         ret = readdir_r(dir, &entry, &result);
311                         if (ret != 0) {
312                                 ret = WIMLIB_ERR_READ;
313                                 ERROR_WITH_ERRNO("Error reading the "
314                                                  "directory `%s'",
315                                                  root_disk_path);
316                                 break;
317                         }
318                         if (result == NULL)
319                                 break;
320                         if (result->d_name[0] == '.' && (result->d_name[1] == '\0'
321                               || (result->d_name[1] == '.' && result->d_name[2] == '\0')))
322                                         continue;
323                         strcpy(name + len + 1, result->d_name);
324                         ret = build_dentry_tree(&child, name, lookup_table,
325                                                 NULL, config, add_image_flags,
326                                                 progress_func, NULL);
327                         if (ret != 0)
328                                 break;
329                         if (child)
330                                 dentry_add_child(root, child);
331                 }
332                 closedir(dir);
333         } else { /* Archiving a symbolic link */
334                 inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
335                 inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
336
337                 /* The idea here is to call readlink() to get the UNIX target of
338                  * the symbolic link, then turn the target into a reparse point
339                  * data buffer that contains a relative or absolute symbolic
340                  * link (NOT a junction point or *full* path symbolic link with
341                  * drive letter).
342                  */
343
344                 char deref_name_buf[4096];
345                 ssize_t deref_name_len;
346
347                 deref_name_len = readlink(root_disk_path, deref_name_buf,
348                                           sizeof(deref_name_buf) - 1);
349                 if (deref_name_len >= 0) {
350                         deref_name_buf[deref_name_len] = '\0';
351                         DEBUG("Read symlink `%s'", deref_name_buf);
352                         ret = inode_set_symlink(root->d_inode, deref_name_buf,
353                                                 lookup_table, NULL);
354                         if (ret == 0) {
355                                 /*
356                                  * Unfortunately, Windows seems to have the
357                                  * concept of "file" symbolic links as being
358                                  * different from "directory" symbolic links...
359                                  * so FILE_ATTRIBUTE_DIRECTORY needs to be set
360                                  * on the symbolic link if the *target* of the
361                                  * symbolic link is a directory.
362                                  */
363                                 struct stat stbuf;
364                                 if (stat(root_disk_path, &stbuf) == 0 &&
365                                     S_ISDIR(stbuf.st_mode))
366                                 {
367                                         inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
368                                 }
369                         }
370                 } else {
371                         ERROR_WITH_ERRNO("Failed to read target of "
372                                          "symbolic link `%s'", root_disk_path);
373                         ret = WIMLIB_ERR_READLINK;
374                 }
375         }
376 out:
377         if (ret == 0)
378                 *root_ret = root;
379         else
380                 free_dentry_tree(root, lookup_table);
381         return ret;
382 }
383
384
385 enum pattern_type {
386         NONE = 0,
387         EXCLUSION_LIST,
388         EXCLUSION_EXCEPTION,
389         COMPRESSION_EXCLUSION_LIST,
390         ALIGNMENT_LIST,
391 };
392
393 #define COMPAT_DEFAULT_CONFIG
394
395 /* Default capture configuration file when none is specified. */
396 static const char *default_config =
397 #ifdef COMPAT_DEFAULT_CONFIG /* XXX: This policy is being moved to library
398                                 users.  The next ABI-incompatible library
399                                 version will default to the empty string here. */
400 "[ExclusionList]\n"
401 "\\$ntfs.log\n"
402 "\\hiberfil.sys\n"
403 "\\pagefile.sys\n"
404 "\\System Volume Information\n"
405 "\\RECYCLER\n"
406 "\\Windows\\CSC\n"
407 "\n"
408 "[CompressionExclusionList]\n"
409 "*.mp3\n"
410 "*.zip\n"
411 "*.cab\n"
412 "\\WINDOWS\\inf\\*.pnf\n";
413 #else
414 "";
415 #endif
416
417 static void destroy_pattern_list(struct pattern_list *list)
418 {
419         FREE(list->pats);
420 }
421
422 static void destroy_capture_config(struct capture_config *config)
423 {
424         destroy_pattern_list(&config->exclusion_list);
425         destroy_pattern_list(&config->exclusion_exception);
426         destroy_pattern_list(&config->compression_exclusion_list);
427         destroy_pattern_list(&config->alignment_list);
428         FREE(config->config_str);
429         FREE(config->prefix);
430         memset(config, 0, sizeof(*config));
431 }
432
433 static int pattern_list_add_pattern(struct pattern_list *list,
434                                     const char *pattern)
435 {
436         const char **pats;
437         if (list->num_pats >= list->num_allocated_pats) {
438                 pats = REALLOC(list->pats,
439                                sizeof(list->pats[0]) * (list->num_allocated_pats + 8));
440                 if (!pats)
441                         return WIMLIB_ERR_NOMEM;
442                 list->num_allocated_pats += 8;
443                 list->pats = pats;
444         }
445         list->pats[list->num_pats++] = pattern;
446         return 0;
447 }
448
449 /* Parses the contents of the image capture configuration file and fills in a
450  * `struct capture_config'. */
451 static int init_capture_config(struct capture_config *config,
452                                const char *_config_str, size_t config_len)
453 {
454         char *config_str;
455         char *p;
456         char *eol;
457         char *next_p;
458         size_t bytes_remaining;
459         enum pattern_type type = NONE;
460         int ret;
461         unsigned long line_no = 0;
462
463         DEBUG("config_len = %zu", config_len);
464         bytes_remaining = config_len;
465         memset(config, 0, sizeof(*config));
466         config_str = MALLOC(config_len);
467         if (!config_str) {
468                 ERROR("Could not duplicate capture config string");
469                 return WIMLIB_ERR_NOMEM;
470         }
471
472         memcpy(config_str, _config_str, config_len);
473         next_p = config_str;
474         config->config_str = config_str;
475         while (bytes_remaining) {
476                 line_no++;
477                 p = next_p;
478                 eol = memchr(p, '\n', bytes_remaining);
479                 if (!eol) {
480                         ERROR("Expected end-of-line in capture config file on "
481                               "line %lu", line_no);
482                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
483                         goto out_destroy;
484                 }
485
486                 next_p = eol + 1;
487                 bytes_remaining -= (next_p - p);
488                 if (eol == p)
489                         continue;
490
491                 if (*(eol - 1) == '\r')
492                         eol--;
493                 *eol = '\0';
494
495                 /* Translate backslash to forward slash */
496                 for (char *pp = p; pp != eol; pp++)
497                         if (*pp == '\\')
498                                 *pp = '/';
499
500                 /* Remove drive letter */
501                 if (eol - p > 2 && isalpha(*p) && *(p + 1) == ':')
502                         p += 2;
503
504                 ret = 0;
505                 if (strcmp(p, "[ExclusionList]") == 0)
506                         type = EXCLUSION_LIST;
507                 else if (strcmp(p, "[ExclusionException]") == 0)
508                         type = EXCLUSION_EXCEPTION;
509                 else if (strcmp(p, "[CompressionExclusionList]") == 0)
510                         type = COMPRESSION_EXCLUSION_LIST;
511                 else if (strcmp(p, "[AlignmentList]") == 0)
512                         type = ALIGNMENT_LIST;
513                 else if (p[0] == '[' && strrchr(p, ']')) {
514                         ERROR("Unknown capture configuration section `%s'", p);
515                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
516                 } else switch (type) {
517                 case EXCLUSION_LIST:
518                         DEBUG("Adding pattern \"%s\" to exclusion list", p);
519                         ret = pattern_list_add_pattern(&config->exclusion_list, p);
520                         break;
521                 case EXCLUSION_EXCEPTION:
522                         DEBUG("Adding pattern \"%s\" to exclusion exception list", p);
523                         ret = pattern_list_add_pattern(&config->exclusion_exception, p);
524                         break;
525                 case COMPRESSION_EXCLUSION_LIST:
526                         DEBUG("Adding pattern \"%s\" to compression exclusion list", p);
527                         ret = pattern_list_add_pattern(&config->compression_exclusion_list, p);
528                         break;
529                 case ALIGNMENT_LIST:
530                         DEBUG("Adding pattern \"%s\" to alignment list", p);
531                         ret = pattern_list_add_pattern(&config->alignment_list, p);
532                         break;
533                 default:
534                         ERROR("Line %lu of capture configuration is not "
535                               "in a block (such as [ExclusionList])",
536                               line_no);
537                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
538                         break;
539                 }
540                 if (ret != 0)
541                         goto out_destroy;
542         }
543         return 0;
544 out_destroy:
545         destroy_capture_config(config);
546         return ret;
547 }
548
549 static int capture_config_set_prefix(struct capture_config *config,
550                                      const char *_prefix)
551 {
552         char *prefix = STRDUP(_prefix);
553
554         if (!prefix)
555                 return WIMLIB_ERR_NOMEM;
556         FREE(config->prefix);
557         config->prefix = prefix;
558         config->prefix_len = strlen(prefix);
559         return 0;
560 }
561
562 static bool match_pattern(const char *path, const char *path_basename,
563                           const struct pattern_list *list)
564 {
565         for (size_t i = 0; i < list->num_pats; i++) {
566                 const char *pat = list->pats[i];
567                 const char *string;
568                 if (pat[0] == '/')
569                         /* Absolute path from root of capture */
570                         string = path;
571                 else {
572                         if (strchr(pat, '/'))
573                                 /* Relative path from root of capture */
574                                 string = path + 1;
575                         else
576                                 /* A file name pattern */
577                                 string = path_basename;
578                 }
579                 if (fnmatch(pat, string, FNM_PATHNAME
580                         #ifdef FNM_CASEFOLD
581                                         | FNM_CASEFOLD
582                         #endif
583                         ) == 0)
584                 {
585                         DEBUG("`%s' matches the pattern \"%s\"",
586                               string, pat);
587                         return true;
588                 }
589         }
590         return false;
591 }
592
593 /* Return true if the image capture configuration file indicates we should
594  * exclude the filename @path from capture.
595  *
596  * If @exclude_prefix is %true, the part of the path up and including the name
597  * of the directory being captured is not included in the path for matching
598  * purposes.  This allows, for example, a pattern like /hiberfil.sys to match a
599  * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7
600  * directory.
601  */
602 bool exclude_path(const char *path, const struct capture_config *config,
603                   bool exclude_prefix)
604 {
605         const char *basename = path_basename(path);
606         if (exclude_prefix) {
607                 wimlib_assert(strlen(path) >= config->prefix_len);
608                 if (memcmp(config->prefix, path, config->prefix_len) == 0
609                      && path[config->prefix_len] == '/')
610                         path += config->prefix_len;
611         }
612         return match_pattern(path, basename, &config->exclusion_list) &&
613                 !match_pattern(path, basename, &config->exclusion_exception);
614
615 }
616
617 /* Strip leading and trailing forward slashes from a string.  Modifies it in
618  * place and returns the stripped string. */
619 static const char *canonicalize_target_path(char *target_path)
620 {
621         char *p;
622         if (target_path == NULL)
623                 target_path = "";
624         for (;;) {
625                 if (*target_path == '\0')
626                         return target_path;
627                 else if (*target_path == '/')
628                         target_path++;
629                 else
630                         break;
631         }
632
633         p = target_path + strlen(target_path) - 1;
634         while (*p == '/')
635                 *p-- = '\0';
636         return target_path;
637 }
638
639 /* Strip leading and trailing slashes from the target paths */
640 static void canonicalize_targets(struct wimlib_capture_source *sources,
641                                  size_t num_sources)
642 {
643         while (num_sources--) {
644                 DEBUG("Canonicalizing { source: \"%s\", target=\"%s\"}",
645                       sources->fs_source_path,
646                       sources->wim_target_path);
647                 sources->wim_target_path =
648                         (char*)canonicalize_target_path(sources->wim_target_path);
649                 DEBUG("Canonical target: \"%s\"", sources->wim_target_path);
650                 sources++;
651         }
652 }
653
654 static int capture_source_cmp(const void *p1, const void *p2)
655 {
656         const struct wimlib_capture_source *s1 = p1, *s2 = p2;
657         return strcmp(s1->wim_target_path, s2->wim_target_path);
658 }
659
660 /* Sorts the capture sources lexicographically by target path.  This occurs
661  * after leading and trailing forward slashes are stripped.
662  *
663  * One purpose of this is to make sure that target paths that are inside other
664  * target paths are extracted after the containing target paths. */
665 static void sort_sources(struct wimlib_capture_source *sources,
666                          size_t num_sources)
667 {
668         qsort(sources, num_sources, sizeof(sources[0]), capture_source_cmp);
669 }
670
671 static int check_sorted_sources(struct wimlib_capture_source *sources,
672                                 size_t num_sources, int add_image_flags)
673 {
674         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
675                 if (num_sources != 1) {
676                         ERROR("Must specify exactly 1 capture source "
677                               "(the NTFS volume) in NTFS mode!");
678                         return WIMLIB_ERR_INVALID_PARAM;
679                 }
680                 if (sources[0].wim_target_path[0] != '\0') {
681                         ERROR("In NTFS capture mode the target path inside "
682                               "the image must be the root directory!");
683                         return WIMLIB_ERR_INVALID_PARAM;
684                 }
685         } else if (num_sources != 0) {
686                 /* This code is disabled because the current code
687                  * unconditionally attempts to do overlays.  So, duplicate
688                  * target paths are OK. */
689         #if 0
690                 if (num_sources > 1 && sources[0].wim_target_path[0] == '\0') {
691                         ERROR("Cannot specify root target when using multiple "
692                               "capture sources!");
693                         return WIMLIB_ERR_INVALID_PARAM;
694                 }
695                 for (size_t i = 0; i < num_sources - 1; i++) {
696                         size_t len = strlen(sources[i].wim_target_path);
697                         size_t j = i + 1;
698                         const char *target1 = sources[i].wim_target_path;
699                         do {
700                                 const char *target2 = sources[j].wim_target_path;
701                                 DEBUG("target1=%s, target2=%s",
702                                       target1,target2);
703                                 if (strncmp(target1, target2, len) ||
704                                     target2[len] > '/')
705                                         break;
706                                 if (target2[len] == '/') {
707                                         ERROR("Invalid target `%s': is a prefix of `%s'",
708                                               target1, target2);
709                                         return WIMLIB_ERR_INVALID_PARAM;
710                                 }
711                                 if (target2[len] == '\0') {
712                                         ERROR("Invalid target `%s': is a duplicate of `%s'",
713                                               target1, target2);
714                                         return WIMLIB_ERR_INVALID_PARAM;
715                                 }
716                         } while (++j != num_sources);
717                 }
718         #endif
719         }
720         return 0;
721
722 }
723
724 /* Creates a new directory to place in the WIM image.  This is to create parent
725  * directories that are not part of any target as needed.  */
726 static struct wim_dentry *
727 new_filler_directory(const char *name)
728 {
729         struct wim_dentry *dentry;
730         DEBUG("Creating filler directory \"%s\"", name);
731         dentry = new_dentry_with_inode(name);
732         if (dentry) {
733                 /* Set the inode number to 0 for now.  The final inode number
734                  * will be assigned later by assign_inode_numbers(). */
735                 dentry->d_inode->i_ino = 0;
736                 dentry->d_inode->i_resolved = 1;
737                 dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
738         }
739         return dentry;
740 }
741
742 /* Transfers the children of @branch to @target.  It is an error if @target is
743  * not a directory or if both @branch and @target contain a child dentry with
744  * the same name. */
745 static int do_overlay(struct wim_dentry *target, struct wim_dentry *branch)
746 {
747         struct rb_root *rb_root;
748
749         if (!dentry_is_directory(target)) {
750                 ERROR("Cannot overlay directory `%s' over non-directory",
751                       branch->file_name_utf8);
752                 return WIMLIB_ERR_INVALID_OVERLAY;
753         }
754
755         rb_root = &branch->d_inode->i_children;
756         while (rb_root->rb_node) { /* While @branch has children... */
757                 struct wim_dentry *child = rbnode_dentry(rb_root->rb_node);
758                 /* Move @child to the directory @target */
759                 unlink_dentry(child);
760                 if (!dentry_add_child(target, child)) {
761                         /* Revert the change to avoid leaking the directory tree
762                          * rooted at @child */
763                         dentry_add_child(branch, child);
764                         ERROR("Overlay error: file `%s' already exists "
765                               "as a child of `%s'",
766                               child->file_name_utf8, target->file_name_utf8);
767                         return WIMLIB_ERR_INVALID_OVERLAY;
768                 }
769         }
770         return 0;
771
772 }
773
774 /* Attach or overlay a branch onto the WIM image.
775  *
776  * @root_p:
777  *      Pointer to the root of the WIM image, or pointer to NULL if it has not
778  *      been created yet.
779  * @branch
780  *      Branch to add.
781  * @target_path:
782  *      Path in the WIM image to add the branch, with leading and trailing
783  *      slashes stripped.
784  */
785 static int attach_branch(struct wim_dentry **root_p,
786                          struct wim_dentry *branch,
787                          char *target_path)
788 {
789         char *slash;
790         struct wim_dentry *dentry, *parent, *target;
791
792         if (*target_path == '\0') {
793                 /* Target: root directory */
794                 if (*root_p) {
795                         /* Overlay on existing root */
796                         return do_overlay(*root_p, branch);
797                 } else  {
798                         /* Set as root */
799                         *root_p = branch;
800                         return 0;
801                 }
802         }
803
804         /* Adding a non-root branch.  Create root if it hasn't been created
805          * already. */
806         if (!*root_p) {
807                 *root_p = new_filler_directory("");
808                 if (!*root_p)
809                         return WIMLIB_ERR_NOMEM;
810         }
811
812         /* Walk the path to the branch, creating filler directories as needed.
813          * */
814         parent = *root_p;
815         while ((slash = strchr(target_path, '/'))) {
816                 *slash = '\0';
817                 dentry = get_dentry_child_with_name(parent, target_path);
818                 if (!dentry) {
819                         dentry = new_filler_directory(target_path);
820                         if (!dentry)
821                                 return WIMLIB_ERR_NOMEM;
822                         dentry_add_child(parent, dentry);
823                 }
824                 parent = dentry;
825                 target_path = slash;
826                 /* Skip over slashes.  Note: this cannot overrun the length of
827                  * the string because the last character cannot be a slash, as
828                  * trailing slashes were tripped.  */
829                 do {
830                         ++target_path;
831                 } while (*target_path == '/');
832         }
833
834         /* If the target path already existed, overlay the branch onto it.
835          * Otherwise, set the branch as the target path. */
836         target = get_dentry_child_with_name(parent, branch->file_name_utf8);
837         if (target) {
838                 return do_overlay(target, branch);
839         } else {
840                 dentry_add_child(parent, branch);
841                 return 0;
842         }
843 }
844
845 WIMLIBAPI int wimlib_add_image_multisource(WIMStruct *w,
846                                            struct wimlib_capture_source *sources,
847                                            size_t num_sources,
848                                            const char *name,
849                                            const char *config_str,
850                                            size_t config_len,
851                                            int add_image_flags,
852                                            wimlib_progress_func_t progress_func)
853 {
854         int (*capture_tree)(struct wim_dentry **, const char *,
855                             struct wim_lookup_table *,
856                             struct wim_security_data *,
857                             const struct capture_config *,
858                             int, wimlib_progress_func_t, void *);
859         void *extra_arg;
860         struct wim_dentry *root_dentry;
861         struct wim_dentry *branch;
862         struct wim_security_data *sd;
863         struct capture_config config;
864         struct wim_image_metadata *imd;
865         int ret;
866
867         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
868 #ifdef WITH_NTFS_3G
869                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
870                         ERROR("Cannot dereference files when capturing directly from NTFS");
871                         return WIMLIB_ERR_INVALID_PARAM;
872                 }
873                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
874                         ERROR("Capturing UNIX owner and mode not supported "
875                               "when capturing directly from NTFS");
876                         return WIMLIB_ERR_INVALID_PARAM;
877                 }
878                 capture_tree = build_dentry_tree_ntfs;
879                 extra_arg = &w->ntfs_vol;
880 #else
881                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
882                       "        cannot capture a WIM image directly from a NTFS volume!");
883                 return WIMLIB_ERR_UNSUPPORTED;
884 #endif
885         } else {
886                 capture_tree = build_dentry_tree;
887                 extra_arg = NULL;
888         }
889
890         if (!name || !*name) {
891                 ERROR("Must specify a non-empty string for the image name");
892                 return WIMLIB_ERR_INVALID_PARAM;
893         }
894
895         if (w->hdr.total_parts != 1) {
896                 ERROR("Cannot add an image to a split WIM");
897                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
898         }
899
900         if (wimlib_image_name_in_use(w, name)) {
901                 ERROR("There is already an image named \"%s\" in `%s'",
902                       name, w->filename);
903                 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
904         }
905
906         if (!config_str) {
907                 DEBUG("Using default capture configuration");
908                 config_str = default_config;
909                 config_len = strlen(default_config);
910         }
911         ret = init_capture_config(&config, config_str, config_len);
912         if (ret)
913                 goto out;
914
915         DEBUG("Allocating security data");
916         sd = CALLOC(1, sizeof(struct wim_security_data));
917         if (!sd) {
918                 ret = WIMLIB_ERR_NOMEM;
919                 goto out_destroy_capture_config;
920         }
921         sd->total_length = 8;
922         sd->refcnt = 1;
923
924         DEBUG("Using %zu capture sources", num_sources);
925         canonicalize_targets(sources, num_sources);
926         sort_sources(sources, num_sources);
927         ret = check_sorted_sources(sources, num_sources, add_image_flags);
928         if (ret) {
929                 ret = WIMLIB_ERR_INVALID_PARAM;
930                 goto out_free_security_data;
931         }
932
933         DEBUG("Building dentry tree.");
934         if (num_sources == 0) {
935                 root_dentry = new_filler_directory("");
936                 if (!root_dentry)
937                         goto out_free_security_data;
938         } else {
939                 size_t i;
940
941                 root_dentry = NULL;
942                 i = 0;
943                 do {
944                         int flags;
945                         union wimlib_progress_info progress;
946
947                         DEBUG("Building dentry tree for source %zu of %zu "
948                               "(\"%s\" => \"%s\")", i + 1, num_sources,
949                               sources[i].fs_source_path,
950                               sources[i].wim_target_path);
951                         if (progress_func) {
952                                 memset(&progress, 0, sizeof(progress));
953                                 progress.scan.source = sources[i].fs_source_path;
954                                 progress.scan.wim_target_path = sources[i].wim_target_path;
955                                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_BEGIN, &progress);
956                         }
957                         ret = capture_config_set_prefix(&config,
958                                                         sources[i].fs_source_path);
959                         if (ret)
960                                 goto out_free_dentry_tree;
961                         flags = add_image_flags | WIMLIB_ADD_IMAGE_FLAG_SOURCE;
962                         if (!*sources[i].wim_target_path)
963                                 flags |= WIMLIB_ADD_IMAGE_FLAG_ROOT;
964                         ret = (*capture_tree)(&branch, sources[i].fs_source_path,
965                                               w->lookup_table, sd,
966                                               &config,
967                                               flags,
968                                               progress_func, extra_arg);
969                         if (ret) {
970                                 ERROR("Failed to build dentry tree for `%s'",
971                                       sources[i].fs_source_path);
972                                 goto out_free_dentry_tree;
973                         }
974                         if (branch) {
975                                 /* Use the target name, not the source name, for
976                                  * the root of each branch from a capture
977                                  * source.  (This will also set the root dentry
978                                  * of the entire image to be unnamed.) */
979                                 ret = set_dentry_name(branch,
980                                                       path_basename(sources[i].wim_target_path));
981                                 if (ret)
982                                         goto out_free_branch;
983
984                                 ret = attach_branch(&root_dentry, branch,
985                                                     sources[i].wim_target_path);
986                                 if (ret)
987                                         goto out_free_branch;
988                         }
989                         if (progress_func)
990                                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_END, &progress);
991                 } while (++i != num_sources);
992         }
993
994         DEBUG("Calculating full paths of dentries.");
995         ret = for_dentry_in_tree(root_dentry, calculate_dentry_full_path, NULL);
996         if (ret != 0)
997                 goto out_free_dentry_tree;
998
999         ret = add_new_dentry_tree(w, root_dentry, sd);
1000         if (ret != 0)
1001                 goto out_free_dentry_tree;
1002
1003         imd = &w->image_metadata[w->hdr.image_count - 1];
1004
1005         ret = dentry_tree_fix_inodes(root_dentry, &imd->inode_list);
1006         if (ret != 0)
1007                 goto out_destroy_imd;
1008
1009         DEBUG("Assigning hard link group IDs");
1010         assign_inode_numbers(&imd->inode_list);
1011
1012         ret = xml_add_image(w, name);
1013         if (ret != 0)
1014                 goto out_destroy_imd;
1015
1016         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_BOOT)
1017                 wimlib_set_boot_idx(w, w->hdr.image_count);
1018         ret = 0;
1019         goto out;
1020 out_destroy_imd:
1021         destroy_image_metadata(&w->image_metadata[w->hdr.image_count - 1],
1022                                w->lookup_table);
1023         w->hdr.image_count--;
1024         goto out;
1025 out_free_branch:
1026         free_dentry_tree(branch, w->lookup_table);
1027 out_free_dentry_tree:
1028         free_dentry_tree(root_dentry, w->lookup_table);
1029 out_free_security_data:
1030         free_security_data(sd);
1031 out_destroy_capture_config:
1032         destroy_capture_config(&config);
1033 out:
1034         return ret;
1035 }
1036
1037 WIMLIBAPI int wimlib_add_image(WIMStruct *w, const char *source,
1038                                const char *name, const char *config_str,
1039                                size_t config_len, int add_image_flags,
1040                                wimlib_progress_func_t progress_func)
1041 {
1042         if (!source || !*source)
1043                 return WIMLIB_ERR_INVALID_PARAM;
1044
1045         char *fs_source_path = STRDUP(source);
1046         int ret;
1047         struct wimlib_capture_source capture_src = {
1048                 .fs_source_path = fs_source_path,
1049                 .wim_target_path = NULL,
1050                 .reserved = 0,
1051         };
1052         ret = wimlib_add_image_multisource(w, &capture_src, 1, name,
1053                                            config_str, config_len,
1054                                            add_image_flags, progress_func);
1055         FREE(fs_source_path);
1056         return ret;
1057 }