]> wimlib.net Git - wimlib/blob - src/add_image.c
Modify treatment of metadata entries
[wimlib] / src / add_image.c
1 /*
2  * add_image.c
3  */
4
5 /*
6  * Copyright (C) 2012, 2013 Eric Biggers
7  *
8  * This file is part of wimlib, a library for working with WIM files.
9  *
10  * wimlib is free software; you can redistribute it and/or modify it under the
11  * terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option)
13  * any later version.
14  *
15  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17  * A PARTICULAR PURPOSE. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with wimlib; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #include "config.h"
25
26 #ifdef __WIN32__
27 #  include "win32.h"
28 #else
29 #  include <dirent.h>
30 #  include <sys/stat.h>
31 #  include <fnmatch.h>
32 #  include "timestamp.h"
33 #endif
34
35 #include "wimlib_internal.h"
36 #include "dentry.h"
37 #include "lookup_table.h"
38 #include "xml.h"
39 #include "security.h"
40
41 #include <ctype.h>
42 #include <errno.h>
43 #include <stdlib.h>
44 #include <string.h>
45
46 #include <unistd.h>
47
48 #ifdef HAVE_ALLOCA_H
49 #  include <alloca.h>
50 #endif
51
52 /*
53  * Adds the dentry tree and security data for a new image to the image metadata
54  * array of the WIMStruct.
55  */
56 int
57 add_new_dentry_tree(WIMStruct *w, struct wim_dentry *root_dentry,
58                     struct wim_security_data *sd)
59 {
60         struct wim_lookup_table_entry *metadata_lte;
61         struct wim_image_metadata *imd;
62         struct wim_image_metadata *new_imd;
63
64         wimlib_assert(root_dentry != NULL);
65
66         DEBUG("Reallocating image metadata array for image_count = %u",
67               w->hdr.image_count + 1);
68         imd = CALLOC((w->hdr.image_count + 1), sizeof(struct wim_image_metadata));
69
70         if (!imd) {
71                 ERROR("Failed to allocate memory for new image metadata array");
72                 goto err;
73         }
74
75         memcpy(imd, w->image_metadata,
76                w->hdr.image_count * sizeof(struct wim_image_metadata));
77
78         metadata_lte = new_lookup_table_entry();
79         if (!metadata_lte)
80                 goto err_free_imd;
81
82         metadata_lte->resource_entry.flags = WIM_RESHDR_FLAG_METADATA;
83
84         new_imd = &imd[w->hdr.image_count];
85
86         new_imd->root_dentry    = root_dentry;
87         new_imd->metadata_lte   = metadata_lte;
88         new_imd->security_data  = sd;
89         new_imd->modified       = 1;
90
91         FREE(w->image_metadata);
92         w->image_metadata = imd;
93         w->hdr.image_count++;
94         return 0;
95 err_free_imd:
96         FREE(imd);
97 err:
98         return WIMLIB_ERR_NOMEM;
99
100 }
101
102 #ifndef __WIN32__
103 /*
104  * unix_build_dentry_tree():
105  *      Recursively builds a tree of WIM dentries from an on-disk directory
106  *      tree (UNIX version; no NTFS-specific data is captured).
107  *
108  * @root_ret:   Place to return a pointer to the root of the dentry tree.  Only
109  *              modified if successful.  Set to NULL if the file or directory was
110  *              excluded from capture.
111  *
112  * @root_disk_path:  The path to the root of the directory tree on disk.
113  *
114  * @lookup_table: The lookup table for the WIM file.  For each file added to the
115  *              dentry tree being built, an entry is added to the lookup table,
116  *              unless an identical stream is already in the lookup table.
117  *              These lookup table entries that are added point to the path of
118  *              the file on disk.
119  *
120  * @sd_set:     Ignored.  (Security data only captured in NTFS mode.)
121  *
122  * @capture_config:
123  *              Configuration for files to be excluded from capture.
124  *
125  * @add_flags:  Bitwise or of WIMLIB_ADD_IMAGE_FLAG_*
126  *
127  * @extra_arg:  Ignored
128  *
129  * @return:     0 on success, nonzero on failure.  It is a failure if any of
130  *              the files cannot be `stat'ed, or if any of the needed
131  *              directories cannot be opened or read.  Failure to add the files
132  *              to the WIM may still occur later when trying to actually read
133  *              the on-disk files during a call to wimlib_write() or
134  *              wimlib_overwrite().
135  */
136 static int
137 unix_build_dentry_tree(struct wim_dentry **root_ret,
138                        const char *root_disk_path,
139                        struct wim_lookup_table *lookup_table,
140                        struct sd_set *sd_set,
141                        const struct capture_config *config,
142                        int add_image_flags,
143                        wimlib_progress_func_t progress_func,
144                        void *extra_arg)
145 {
146         struct wim_dentry *root = NULL;
147         int ret = 0;
148         struct wim_inode *inode;
149
150         if (exclude_path(root_disk_path, config, true)) {
151                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) {
152                         ERROR("Cannot exclude the root directory from capture");
153                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
154                         goto out;
155                 }
156                 if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
157                     && progress_func)
158                 {
159                         union wimlib_progress_info info;
160                         info.scan.cur_path = root_disk_path;
161                         info.scan.excluded = true;
162                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
163                 }
164                 goto out;
165         }
166
167         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
168             && progress_func)
169         {
170                 union wimlib_progress_info info;
171                 info.scan.cur_path = root_disk_path;
172                 info.scan.excluded = false;
173                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
174         }
175
176         /* UNIX version of capturing a directory tree */
177         struct stat root_stbuf;
178         int (*stat_fn)(const char *restrict, struct stat *restrict);
179         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE)
180                 stat_fn = stat;
181         else
182                 stat_fn = lstat;
183
184         ret = (*stat_fn)(root_disk_path, &root_stbuf);
185         if (ret != 0) {
186                 ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
187                 goto out;
188         }
189
190         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) &&
191               !S_ISDIR(root_stbuf.st_mode))
192         {
193                 /* Do a dereference-stat in case the root is a symbolic link.
194                  * This case is allowed, provided that the symbolic link points
195                  * to a directory. */
196                 ret = stat(root_disk_path, &root_stbuf);
197                 if (ret != 0) {
198                         ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
199                         ret = WIMLIB_ERR_STAT;
200                         goto out;
201                 }
202                 if (!S_ISDIR(root_stbuf.st_mode)) {
203                         ERROR("`%s' is not a directory", root_disk_path);
204                         ret = WIMLIB_ERR_NOTDIR;
205                         goto out;
206                 }
207         }
208         if (!S_ISREG(root_stbuf.st_mode) && !S_ISDIR(root_stbuf.st_mode)
209             && !S_ISLNK(root_stbuf.st_mode)) {
210                 ERROR("`%s' is not a regular file, directory, or symbolic link.",
211                       root_disk_path);
212                 ret = WIMLIB_ERR_SPECIAL_FILE;
213                 goto out;
214         }
215
216         ret = new_dentry_with_timeless_inode(path_basename(root_disk_path),
217                                              &root);
218         if (ret)
219                 goto out;
220
221         inode = root->d_inode;
222
223 #ifdef HAVE_STAT_NANOSECOND_PRECISION
224         inode->i_creation_time = timespec_to_wim_timestamp(root_stbuf.st_mtim);
225         inode->i_last_write_time = timespec_to_wim_timestamp(root_stbuf.st_mtim);
226         inode->i_last_access_time = timespec_to_wim_timestamp(root_stbuf.st_atim);
227 #else
228         inode->i_creation_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
229         inode->i_last_write_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
230         inode->i_last_access_time = unix_timestamp_to_wim(root_stbuf.st_atime);
231 #endif
232         /* Leave the inode number at 0 for directories. */
233         if (!S_ISDIR(root_stbuf.st_mode)) {
234                 if (sizeof(ino_t) >= 8)
235                         inode->i_ino = (u64)root_stbuf.st_ino;
236                 else
237                         inode->i_ino = (u64)root_stbuf.st_ino |
238                                            ((u64)root_stbuf.st_dev <<
239                                                 ((sizeof(ino_t) * 8) & 63));
240         }
241         inode->i_resolved = 1;
242         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
243                 ret = inode_set_unix_data(inode, root_stbuf.st_uid,
244                                           root_stbuf.st_gid,
245                                           root_stbuf.st_mode,
246                                           lookup_table,
247                                           UNIX_DATA_ALL | UNIX_DATA_CREATE);
248                 if (ret)
249                         goto out;
250         }
251         add_image_flags &= ~(WIMLIB_ADD_IMAGE_FLAG_ROOT | WIMLIB_ADD_IMAGE_FLAG_SOURCE);
252         if (S_ISREG(root_stbuf.st_mode)) { /* Archiving a regular file */
253
254                 struct wim_lookup_table_entry *lte;
255                 u8 hash[SHA1_HASH_SIZE];
256
257                 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
258
259                 /* Empty files do not have to have a lookup table entry. */
260                 if (root_stbuf.st_size == 0)
261                         goto out;
262
263                 /* For each regular file, we must check to see if the file is in
264                  * the lookup table already; if it is, we increment its refcnt;
265                  * otherwise, we create a new lookup table entry and insert it.
266                  * */
267
268                 ret = sha1sum(root_disk_path, hash);
269                 if (ret != 0)
270                         goto out;
271
272                 lte = __lookup_resource(lookup_table, hash);
273                 if (lte) {
274                         lte->refcnt++;
275                         DEBUG("Add lte reference %u for `%s'", lte->refcnt,
276                               root_disk_path);
277                 } else {
278                         char *file_on_disk = STRDUP(root_disk_path);
279                         if (!file_on_disk) {
280                                 ERROR("Failed to allocate memory for file path");
281                                 ret = WIMLIB_ERR_NOMEM;
282                                 goto out;
283                         }
284                         lte = new_lookup_table_entry();
285                         if (!lte) {
286                                 FREE(file_on_disk);
287                                 ret = WIMLIB_ERR_NOMEM;
288                                 goto out;
289                         }
290                         lte->file_on_disk = file_on_disk;
291                         lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
292                         lte->resource_entry.original_size = root_stbuf.st_size;
293                         lte->resource_entry.size = root_stbuf.st_size;
294                         copy_hash(lte->hash, hash);
295                         lookup_table_insert(lookup_table, lte);
296                 }
297                 root->d_inode->i_lte = lte;
298         } else if (S_ISDIR(root_stbuf.st_mode)) { /* Archiving a directory */
299
300                 inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
301
302                 DIR *dir;
303                 struct dirent entry, *result;
304                 struct wim_dentry *child;
305
306                 dir = opendir(root_disk_path);
307                 if (!dir) {
308                         ERROR_WITH_ERRNO("Failed to open the directory `%s'",
309                                          root_disk_path);
310                         ret = WIMLIB_ERR_OPEN;
311                         goto out;
312                 }
313
314                 /* Buffer for names of files in directory. */
315                 size_t len = strlen(root_disk_path);
316                 char name[len + 1 + FILENAME_MAX + 1];
317                 memcpy(name, root_disk_path, len);
318                 name[len] = '/';
319
320                 /* Create a dentry for each entry in the directory on disk, and recurse
321                  * to any subdirectories. */
322                 while (1) {
323                         errno = 0;
324                         ret = readdir_r(dir, &entry, &result);
325                         if (ret != 0) {
326                                 ret = WIMLIB_ERR_READ;
327                                 ERROR_WITH_ERRNO("Error reading the "
328                                                  "directory `%s'",
329                                                  root_disk_path);
330                                 break;
331                         }
332                         if (result == NULL)
333                                 break;
334                         if (result->d_name[0] == '.' && (result->d_name[1] == '\0'
335                               || (result->d_name[1] == '.' && result->d_name[2] == '\0')))
336                                         continue;
337                         strcpy(name + len + 1, result->d_name);
338                         ret = unix_build_dentry_tree(&child, name,
339                                                      lookup_table,
340                                                      NULL, config,
341                                                      add_image_flags,
342                                                      progress_func, NULL);
343                         if (ret != 0)
344                                 break;
345                         if (child)
346                                 dentry_add_child(root, child);
347                 }
348                 closedir(dir);
349         } else { /* Archiving a symbolic link */
350                 inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
351                 inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
352
353                 /* The idea here is to call readlink() to get the UNIX target of
354                  * the symbolic link, then turn the target into a reparse point
355                  * data buffer that contains a relative or absolute symbolic
356                  * link (NOT a junction point or *full* path symbolic link with
357                  * drive letter).
358                  */
359
360                 char deref_name_buf[4096];
361                 ssize_t deref_name_len;
362
363                 deref_name_len = readlink(root_disk_path, deref_name_buf,
364                                           sizeof(deref_name_buf) - 1);
365                 if (deref_name_len >= 0) {
366                         deref_name_buf[deref_name_len] = '\0';
367                         DEBUG("Read symlink `%s'", deref_name_buf);
368                         ret = inode_set_symlink(root->d_inode, deref_name_buf,
369                                                 lookup_table, NULL);
370                         if (ret == 0) {
371                                 /*
372                                  * Unfortunately, Windows seems to have the
373                                  * concept of "file" symbolic links as being
374                                  * different from "directory" symbolic links...
375                                  * so FILE_ATTRIBUTE_DIRECTORY needs to be set
376                                  * on the symbolic link if the *target* of the
377                                  * symbolic link is a directory.
378                                  */
379                                 struct stat stbuf;
380                                 if (stat(root_disk_path, &stbuf) == 0 &&
381                                     S_ISDIR(stbuf.st_mode))
382                                 {
383                                         inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
384                                 }
385                         }
386                 } else {
387                         ERROR_WITH_ERRNO("Failed to read target of "
388                                          "symbolic link `%s'", root_disk_path);
389                         ret = WIMLIB_ERR_READLINK;
390                 }
391         }
392 out:
393         if (ret == 0)
394                 *root_ret = root;
395         else
396                 free_dentry_tree(root, lookup_table);
397         return ret;
398 }
399 #endif /* !__WIN32__ */
400
401 enum pattern_type {
402         NONE = 0,
403         EXCLUSION_LIST,
404         EXCLUSION_EXCEPTION,
405         COMPRESSION_EXCLUSION_LIST,
406         ALIGNMENT_LIST,
407 };
408
409 #define COMPAT_DEFAULT_CONFIG
410
411 /* Default capture configuration file when none is specified. */
412 static const tchar *default_config =
413 #ifdef COMPAT_DEFAULT_CONFIG /* XXX: This policy is being moved to library
414                                 users.  The next ABI-incompatible library
415                                 version will default to the empty string here. */
416 T(
417 "[ExclusionList]\n"
418 "\\$ntfs.log\n"
419 "\\hiberfil.sys\n"
420 "\\pagefile.sys\n"
421 "\\System Volume Information\n"
422 "\\RECYCLER\n"
423 "\\Windows\\CSC\n"
424 "\n"
425 "[CompressionExclusionList]\n"
426 "*.mp3\n"
427 "*.zip\n"
428 "*.cab\n"
429 "\\WINDOWS\\inf\\*.pnf\n"
430 );
431 #else
432 T("");
433 #endif
434
435 static void
436 destroy_pattern_list(struct pattern_list *list)
437 {
438         FREE(list->pats);
439 }
440
441 static void
442 destroy_capture_config(struct capture_config *config)
443 {
444         destroy_pattern_list(&config->exclusion_list);
445         destroy_pattern_list(&config->exclusion_exception);
446         destroy_pattern_list(&config->compression_exclusion_list);
447         destroy_pattern_list(&config->alignment_list);
448         FREE(config->config_str);
449         FREE(config->prefix);
450         memset(config, 0, sizeof(*config));
451 }
452
453 static int
454 pattern_list_add_pattern(struct pattern_list *list, const tchar *pattern)
455 {
456         const tchar **pats;
457         if (list->num_pats >= list->num_allocated_pats) {
458                 pats = REALLOC(list->pats,
459                                sizeof(list->pats[0]) * (list->num_allocated_pats + 8));
460                 if (!pats)
461                         return WIMLIB_ERR_NOMEM;
462                 list->num_allocated_pats += 8;
463                 list->pats = pats;
464         }
465         list->pats[list->num_pats++] = pattern;
466         return 0;
467 }
468
469 /* Parses the contents of the image capture configuration file and fills in a
470  * `struct capture_config'. */
471 static int
472 init_capture_config(struct capture_config *config,
473                     const tchar *_config_str,
474                     size_t config_num_tchars)
475 {
476         tchar *config_str;
477         tchar *p;
478         tchar *eol;
479         tchar *next_p;
480         size_t num_tchars_remaining;
481         enum pattern_type type = NONE;
482         int ret;
483         unsigned long line_no = 0;
484
485         DEBUG("config_num_tchars = %zu", config_num_tchars);
486         num_tchars_remaining = config_num_tchars;
487         memset(config, 0, sizeof(*config));
488         config_str = TMALLOC(config_num_tchars);
489         if (!config_str) {
490                 ERROR("Could not duplicate capture config string");
491                 return WIMLIB_ERR_NOMEM;
492         }
493
494         tmemcpy(config_str, _config_str, config_num_tchars);
495         next_p = config_str;
496         config->config_str = config_str;
497         while (num_tchars_remaining != 0) {
498                 line_no++;
499                 p = next_p;
500                 eol = tmemchr(p, T('\n'), num_tchars_remaining);
501                 if (!eol) {
502                         ERROR("Expected end-of-line in capture config file on "
503                               "line %lu", line_no);
504                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
505                         goto out_destroy;
506                 }
507
508                 next_p = eol + 1;
509                 num_tchars_remaining -= (next_p - p);
510                 if (eol == p)
511                         continue;
512
513                 if (*(eol - 1) == T('\r'))
514                         eol--;
515                 *eol = T('\0');
516
517                 /* Translate backslash to forward slash */
518                 for (tchar *pp = p; pp != eol; pp++)
519                         if (*pp == T('\\'))
520                                 *pp = T('/');
521
522                 /* Remove drive letter (UNIX only) */
523         #ifndef __WIN32__
524                 if (eol - p > 2 && istalpha(*p) && *(p + 1) == T(':'))
525                         p += 2;
526         #endif
527
528                 ret = 0;
529                 if (!tstrcmp(p, T("[ExclusionList]")))
530                         type = EXCLUSION_LIST;
531                 else if (!tstrcmp(p, T("[ExclusionException]")))
532                         type = EXCLUSION_EXCEPTION;
533                 else if (!tstrcmp(p, T("[CompressionExclusionList]")))
534                         type = COMPRESSION_EXCLUSION_LIST;
535                 else if (!tstrcmp(p, T("[AlignmentList]")))
536                         type = ALIGNMENT_LIST;
537                 else if (p[0] == T('[') && tstrrchr(p, T(']'))) {
538                         ERROR("Unknown capture configuration section \"%"TS"\"", p);
539                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
540                 } else switch (type) {
541                 case EXCLUSION_LIST:
542                         DEBUG("Adding pattern \"%"TS"\" to exclusion list", p);
543                         ret = pattern_list_add_pattern(&config->exclusion_list, p);
544                         break;
545                 case EXCLUSION_EXCEPTION:
546                         DEBUG("Adding pattern \"%"TS"\" to exclusion exception list", p);
547                         ret = pattern_list_add_pattern(&config->exclusion_exception, p);
548                         break;
549                 case COMPRESSION_EXCLUSION_LIST:
550                         DEBUG("Adding pattern \"%"TS"\" to compression exclusion list", p);
551                         ret = pattern_list_add_pattern(&config->compression_exclusion_list, p);
552                         break;
553                 case ALIGNMENT_LIST:
554                         DEBUG("Adding pattern \"%"TS"\" to alignment list", p);
555                         ret = pattern_list_add_pattern(&config->alignment_list, p);
556                         break;
557                 default:
558                         ERROR("Line %lu of capture configuration is not "
559                               "in a block (such as [ExclusionList])",
560                               line_no);
561                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
562                         break;
563                 }
564                 if (ret != 0)
565                         goto out_destroy;
566         }
567         return 0;
568 out_destroy:
569         destroy_capture_config(config);
570         return ret;
571 }
572
573 static int capture_config_set_prefix(struct capture_config *config,
574                                      const tchar *_prefix)
575 {
576         tchar *prefix = TSTRDUP(_prefix);
577
578         if (!prefix)
579                 return WIMLIB_ERR_NOMEM;
580         FREE(config->prefix);
581         config->prefix = prefix;
582         config->prefix_num_tchars = tstrlen(prefix);
583         return 0;
584 }
585
586 static bool match_pattern(const tchar *path,
587                           const tchar *path_basename,
588                           const struct pattern_list *list)
589 {
590         for (size_t i = 0; i < list->num_pats; i++) {
591                 const tchar *pat = list->pats[i];
592                 const tchar *string;
593                 if (pat[0] == '/')
594                         /* Absolute path from root of capture */
595                         string = path;
596                 else {
597                         if (tstrchr(pat, T('/')))
598                                 /* Relative path from root of capture */
599                                 string = path + 1;
600                         else
601                                 /* A file name pattern */
602                                 string = path_basename;
603                 }
604
605                 /* Warning: on Windows native builds, fnmatch() calls the
606                  * replacement function in win32.c. */
607                 if (fnmatch(pat, string, FNM_PATHNAME
608                                 #ifdef FNM_CASEFOLD
609                                         | FNM_CASEFOLD
610                                 #endif
611                             ) == 0)
612                 {
613                         DEBUG("\"%"TS"\" matches the pattern \"%"TS"\"",
614                               string, pat);
615                         return true;
616                 }
617         }
618         return false;
619 }
620
621 /* Return true if the image capture configuration file indicates we should
622  * exclude the filename @path from capture.
623  *
624  * If @exclude_prefix is %true, the part of the path up and including the name
625  * of the directory being captured is not included in the path for matching
626  * purposes.  This allows, for example, a pattern like /hiberfil.sys to match a
627  * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7
628  * directory.
629  */
630 bool
631 exclude_path(const tchar *path, const struct capture_config *config,
632              bool exclude_prefix)
633 {
634         const tchar *basename = path_basename(path);
635         if (exclude_prefix) {
636                 wimlib_assert(tstrlen(path) >= config->prefix_num_tchars);
637                 if (!tmemcmp(config->prefix, path, config->prefix_num_tchars) &&
638                     path[config->prefix_num_tchars] == T('/'))
639                 {
640                         path += config->prefix_num_tchars;
641                 }
642         }
643         return match_pattern(path, basename, &config->exclusion_list) &&
644                 !match_pattern(path, basename, &config->exclusion_exception);
645
646 }
647
648 /* Strip leading and trailing forward slashes from a string.  Modifies it in
649  * place and returns the stripped string. */
650 static const tchar *
651 canonicalize_target_path(tchar *target_path)
652 {
653         tchar *p;
654         if (target_path == NULL)
655                 return T("");
656         for (;;) {
657                 if (*target_path == T('\0'))
658                         return target_path;
659                 else if (*target_path == T('/'))
660                         target_path++;
661                 else
662                         break;
663         }
664
665         p = tstrchr(target_path, T('\0')) - 1;
666         while (*p == T('/'))
667                 *p-- = T('\0');
668         return target_path;
669 }
670
671 /* Strip leading and trailing slashes from the target paths */
672 static void
673 canonicalize_targets(struct wimlib_capture_source *sources, size_t num_sources)
674 {
675         while (num_sources--) {
676                 DEBUG("Canonicalizing { source: \"%"TS"\", target=\"%"TS"\"}",
677                       sources->fs_source_path,
678                       sources->wim_target_path);
679
680                 /* The Windows API can handle forward slashes.  Just get rid of
681                  * backslashes to avoid confusing other parts of the library
682                  * code. */
683                 zap_backslashes(sources->fs_source_path);
684                 if (sources->wim_target_path)
685                         zap_backslashes(sources->wim_target_path);
686
687                 sources->wim_target_path =
688                         (tchar*)canonicalize_target_path(sources->wim_target_path);
689                 DEBUG("Canonical target: \"%"TS"\"", sources->wim_target_path);
690                 sources++;
691         }
692 }
693
694 static int
695 capture_source_cmp(const void *p1, const void *p2)
696 {
697         const struct wimlib_capture_source *s1 = p1, *s2 = p2;
698         return tstrcmp(s1->wim_target_path, s2->wim_target_path);
699 }
700
701 /* Sorts the capture sources lexicographically by target path.  This occurs
702  * after leading and trailing forward slashes are stripped.
703  *
704  * One purpose of this is to make sure that target paths that are inside other
705  * target paths are added after the containing target paths. */
706 static void
707 sort_sources(struct wimlib_capture_source *sources, size_t num_sources)
708 {
709         qsort(sources, num_sources, sizeof(sources[0]), capture_source_cmp);
710 }
711
712 static int
713 check_sorted_sources(struct wimlib_capture_source *sources, size_t num_sources,
714                      int add_image_flags)
715 {
716         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
717                 if (num_sources != 1) {
718                         ERROR("Must specify exactly 1 capture source "
719                               "(the NTFS volume) in NTFS mode!");
720                         return WIMLIB_ERR_INVALID_PARAM;
721                 }
722                 if (sources[0].wim_target_path[0] != T('\0')) {
723                         ERROR("In NTFS capture mode the target path inside "
724                               "the image must be the root directory!");
725                         return WIMLIB_ERR_INVALID_PARAM;
726                 }
727         } else if (num_sources != 0) {
728                 /* This code is disabled because the current code
729                  * unconditionally attempts to do overlays.  So, duplicate
730                  * target paths are OK. */
731         #if 0
732                 if (num_sources > 1 && sources[0].wim_target_path[0] == '\0') {
733                         ERROR("Cannot specify root target when using multiple "
734                               "capture sources!");
735                         return WIMLIB_ERR_INVALID_PARAM;
736                 }
737                 for (size_t i = 0; i < num_sources - 1; i++) {
738                         size_t len = strlen(sources[i].wim_target_path);
739                         size_t j = i + 1;
740                         const char *target1 = sources[i].wim_target_path;
741                         do {
742                                 const char *target2 = sources[j].wim_target_path;
743                                 DEBUG("target1=%s, target2=%s",
744                                       target1,target2);
745                                 if (strncmp(target1, target2, len) ||
746                                     target2[len] > '/')
747                                         break;
748                                 if (target2[len] == '/') {
749                                         ERROR("Invalid target `%s': is a prefix of `%s'",
750                                               target1, target2);
751                                         return WIMLIB_ERR_INVALID_PARAM;
752                                 }
753                                 if (target2[len] == '\0') {
754                                         ERROR("Invalid target `%s': is a duplicate of `%s'",
755                                               target1, target2);
756                                         return WIMLIB_ERR_INVALID_PARAM;
757                                 }
758                         } while (++j != num_sources);
759                 }
760         #endif
761         }
762         return 0;
763
764 }
765
766 /* Creates a new directory to place in the WIM image.  This is to create parent
767  * directories that are not part of any target as needed.  */
768 static int
769 new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret)
770 {
771         int ret;
772         struct wim_dentry *dentry;
773
774         DEBUG("Creating filler directory \"%"TS"\"", name);
775         ret = new_dentry_with_inode(name, &dentry);
776         if (ret == 0) {
777                 /* Leave the inode number as 0 for now.  The final inode number
778                  * will be assigned later by assign_inode_numbers(). */
779                 dentry->d_inode->i_resolved = 1;
780                 dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
781                 *dentry_ret = dentry;
782         }
783         return ret;
784 }
785
786 /* Transfers the children of @branch to @target.  It is an error if @target is
787  * not a directory or if both @branch and @target contain a child dentry with
788  * the same name. */
789 static int
790 do_overlay(struct wim_dentry *target, struct wim_dentry *branch)
791 {
792         struct rb_root *rb_root;
793
794         DEBUG("Doing overlay \"%"WS"\" => \"%"WS"\"",
795               branch->file_name, target->file_name);
796
797         if (!dentry_is_directory(target)) {
798                 ERROR("Cannot overlay directory \"%"WS"\" "
799                       "over non-directory", branch->file_name);
800                 return WIMLIB_ERR_INVALID_OVERLAY;
801         }
802
803         rb_root = &branch->d_inode->i_children;
804         while (rb_root->rb_node) { /* While @branch has children... */
805                 struct wim_dentry *child = rbnode_dentry(rb_root->rb_node);
806                 /* Move @child to the directory @target */
807                 unlink_dentry(child);
808                 if (!dentry_add_child(target, child)) {
809                         /* Revert the change to avoid leaking the directory tree
810                          * rooted at @child */
811                         dentry_add_child(branch, child);
812                         ERROR("Overlay error: file \"%"WS"\" already exists "
813                               "as a child of \"%"WS"\"",
814                               child->file_name, target->file_name);
815                         return WIMLIB_ERR_INVALID_OVERLAY;
816                 }
817         }
818         free_dentry(branch);
819         return 0;
820
821 }
822
823 /* Attach or overlay a branch onto the WIM image.
824  *
825  * @root_p:
826  *      Pointer to the root of the WIM image, or pointer to NULL if it has not
827  *      been created yet.
828  * @branch
829  *      Branch to add.
830  * @target_path:
831  *      Path in the WIM image to add the branch, with leading and trailing
832  *      slashes stripped.
833  */
834 static int
835 attach_branch(struct wim_dentry **root_p, struct wim_dentry *branch,
836               tchar *target_path)
837 {
838         tchar *slash;
839         struct wim_dentry *dentry, *parent, *target;
840         int ret;
841
842         DEBUG("Attaching branch \"%"WS"\" => \"%"TS"\"",
843               branch->file_name, target_path);
844
845         if (*target_path == T('\0')) {
846                 /* Target: root directory */
847                 if (*root_p) {
848                         /* Overlay on existing root */
849                         return do_overlay(*root_p, branch);
850                 } else  {
851                         /* Set as root */
852                         *root_p = branch;
853                         return 0;
854                 }
855         }
856
857         /* Adding a non-root branch.  Create root if it hasn't been created
858          * already. */
859         if (!*root_p) {
860                 ret  = new_filler_directory(T(""), root_p);
861                 if (ret)
862                         return ret;
863         }
864
865         /* Walk the path to the branch, creating filler directories as needed.
866          * */
867         parent = *root_p;
868         while ((slash = tstrchr(target_path, T('/')))) {
869                 *slash = T('\0');
870                 dentry = get_dentry_child_with_name(parent, target_path);
871                 if (!dentry) {
872                         ret = new_filler_directory(target_path, &dentry);
873                         if (ret)
874                                 return ret;
875                         dentry_add_child(parent, dentry);
876                 }
877                 parent = dentry;
878                 target_path = slash;
879                 /* Skip over slashes.  Note: this cannot overrun the length of
880                  * the string because the last character cannot be a slash, as
881                  * trailing slashes were tripped.  */
882                 do {
883                         ++target_path;
884                 } while (*target_path == T('/'));
885         }
886
887         /* If the target path already existed, overlay the branch onto it.
888          * Otherwise, set the branch as the target path. */
889         target = get_dentry_child_with_utf16le_name(parent, branch->file_name,
890                                                     branch->file_name_nbytes);
891         if (target) {
892                 return do_overlay(target, branch);
893         } else {
894                 dentry_add_child(parent, branch);
895                 return 0;
896         }
897 }
898
899 WIMLIBAPI int
900 wimlib_add_image_multisource(WIMStruct *w,
901                              struct wimlib_capture_source *sources,
902                              size_t num_sources,
903                              const tchar *name,
904                              const tchar *config_str,
905                              size_t config_len,
906                              int add_image_flags,
907                              wimlib_progress_func_t progress_func)
908 {
909         int (*capture_tree)(struct wim_dentry **,
910                             const tchar *,
911                             struct wim_lookup_table *,
912                             struct sd_set *,
913                             const struct capture_config *,
914                             int,
915                             wimlib_progress_func_t,
916                             void *);
917         void *extra_arg;
918         struct wim_dentry *root_dentry;
919         struct wim_dentry *branch;
920         struct wim_security_data *sd;
921         struct capture_config config;
922         struct wim_image_metadata *imd;
923         int ret;
924         struct sd_set sd_set;
925
926         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
927 #ifdef WITH_NTFS_3G
928                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
929                         ERROR("Cannot dereference files when capturing directly from NTFS");
930                         return WIMLIB_ERR_INVALID_PARAM;
931                 }
932                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
933                         ERROR("Capturing UNIX owner and mode not supported "
934                               "when capturing directly from NTFS");
935                         return WIMLIB_ERR_INVALID_PARAM;
936                 }
937                 capture_tree = build_dentry_tree_ntfs;
938                 extra_arg = &w->ntfs_vol;
939 #else
940                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
941                       "        cannot capture a WIM image directly from a NTFS volume!");
942                 return WIMLIB_ERR_UNSUPPORTED;
943 #endif
944         } else {
945         #ifdef __WIN32__
946                 capture_tree = win32_build_dentry_tree;
947         #else
948                 capture_tree = unix_build_dentry_tree;
949         #endif
950                 extra_arg = NULL;
951         }
952
953 #ifdef __WIN32__
954         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
955                 ERROR("Capturing UNIX-specific data is not supported on Windows");
956                 return WIMLIB_ERR_INVALID_PARAM;
957         }
958         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
959                 ERROR("Dereferencing symbolic links is not supported on Windows");
960                 return WIMLIB_ERR_INVALID_PARAM;
961         }
962 #endif
963
964         if (!name || !*name) {
965                 ERROR("Must specify a non-empty string for the image name");
966                 return WIMLIB_ERR_INVALID_PARAM;
967         }
968
969         if (w->hdr.total_parts != 1) {
970                 ERROR("Cannot add an image to a split WIM");
971                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
972         }
973
974         if (wimlib_image_name_in_use(w, name)) {
975                 ERROR("There is already an image named \"%"TS"\" in the WIM!",
976                       name);
977                 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
978         }
979
980         if (!config_str) {
981                 DEBUG("Using default capture configuration");
982                 config_str = default_config;
983                 config_len = tstrlen(default_config);
984         }
985         ret = init_capture_config(&config, config_str, config_len);
986         if (ret)
987                 goto out;
988
989         DEBUG("Allocating security data");
990         sd = CALLOC(1, sizeof(struct wim_security_data));
991         if (!sd) {
992                 ret = WIMLIB_ERR_NOMEM;
993                 goto out_destroy_capture_config;
994         }
995         sd->total_length = 8;
996         sd->refcnt = 1;
997
998         sd_set.sd = sd;
999         sd_set.rb_root.rb_node = NULL;
1000
1001         DEBUG("Using %zu capture sources", num_sources);
1002         canonicalize_targets(sources, num_sources);
1003         sort_sources(sources, num_sources);
1004         ret = check_sorted_sources(sources, num_sources, add_image_flags);
1005         if (ret) {
1006                 ret = WIMLIB_ERR_INVALID_PARAM;
1007                 goto out_free_security_data;
1008         }
1009
1010         DEBUG("Building dentry tree.");
1011         root_dentry = NULL;
1012
1013         for (size_t i = 0; i < num_sources; i++) {
1014                 int flags;
1015                 union wimlib_progress_info progress;
1016
1017                 DEBUG("Building dentry tree for source %zu of %zu "
1018                       "(\"%"TS"\" => \"%"TS"\")", i + 1, num_sources,
1019                       sources[i].fs_source_path,
1020                       sources[i].wim_target_path);
1021                 if (progress_func) {
1022                         memset(&progress, 0, sizeof(progress));
1023                         progress.scan.source = sources[i].fs_source_path;
1024                         progress.scan.wim_target_path = sources[i].wim_target_path;
1025                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_BEGIN, &progress);
1026                 }
1027                 ret = capture_config_set_prefix(&config,
1028                                                 sources[i].fs_source_path);
1029                 if (ret)
1030                         goto out_free_dentry_tree;
1031                 flags = add_image_flags | WIMLIB_ADD_IMAGE_FLAG_SOURCE;
1032                 if (!*sources[i].wim_target_path)
1033                         flags |= WIMLIB_ADD_IMAGE_FLAG_ROOT;
1034                 ret = (*capture_tree)(&branch,
1035                                       sources[i].fs_source_path,
1036                                       w->lookup_table,
1037                                       &sd_set,
1038                                       &config,
1039                                       flags,
1040                                       progress_func, extra_arg);
1041                 if (ret) {
1042                         ERROR("Failed to build dentry tree for `%"TS"'",
1043                               sources[i].fs_source_path);
1044                         goto out_free_dentry_tree;
1045                 }
1046                 if (branch) {
1047                         /* Use the target name, not the source name, for
1048                          * the root of each branch from a capture
1049                          * source.  (This will also set the root dentry
1050                          * of the entire image to be unnamed.) */
1051                         ret = set_dentry_name(branch,
1052                                               path_basename(sources[i].wim_target_path));
1053                         if (ret)
1054                                 goto out_free_branch;
1055
1056                         ret = attach_branch(&root_dentry, branch,
1057                                             sources[i].wim_target_path);
1058                         if (ret)
1059                                 goto out_free_branch;
1060                 }
1061                 if (progress_func)
1062                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_END, &progress);
1063         }
1064
1065         if (root_dentry == NULL) {
1066                 ret = new_filler_directory(T(""), &root_dentry);
1067                 if (ret)
1068                         goto out_free_dentry_tree;
1069         }
1070
1071         DEBUG("Calculating full paths of dentries.");
1072         ret = for_dentry_in_tree(root_dentry, calculate_dentry_full_path, NULL);
1073         if (ret)
1074                 goto out_free_dentry_tree;
1075
1076         ret = add_new_dentry_tree(w, root_dentry, sd);
1077         if (ret)
1078                 goto out_free_dentry_tree;
1079
1080         imd = &w->image_metadata[w->hdr.image_count - 1];
1081
1082         ret = dentry_tree_fix_inodes(root_dentry, &imd->inode_list);
1083         if (ret)
1084                 goto out_destroy_imd;
1085
1086         DEBUG("Assigning hard link group IDs");
1087         assign_inode_numbers(&imd->inode_list);
1088
1089         ret = xml_add_image(w, name);
1090         if (ret)
1091                 goto out_destroy_imd;
1092
1093         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_BOOT)
1094                 wimlib_set_boot_idx(w, w->hdr.image_count);
1095         ret = 0;
1096         goto out_destroy_sd_set;
1097 out_destroy_imd:
1098         destroy_image_metadata(&w->image_metadata[w->hdr.image_count - 1],
1099                                w->lookup_table);
1100         w->hdr.image_count--;
1101         goto out_destroy_sd_set;
1102 out_free_branch:
1103         free_dentry_tree(branch, w->lookup_table);
1104 out_free_dentry_tree:
1105         free_dentry_tree(root_dentry, w->lookup_table);
1106 out_free_security_data:
1107         free_security_data(sd);
1108 out_destroy_sd_set:
1109         destroy_sd_set(&sd_set);
1110 out_destroy_capture_config:
1111         destroy_capture_config(&config);
1112 out:
1113         return ret;
1114 }
1115
1116 WIMLIBAPI int
1117 wimlib_add_image(WIMStruct *w,
1118                  const tchar *source,
1119                  const tchar *name,
1120                  const tchar *config_str,
1121                  size_t config_len,
1122                  int add_image_flags,
1123                  wimlib_progress_func_t progress_func)
1124 {
1125         if (!source || !*source)
1126                 return WIMLIB_ERR_INVALID_PARAM;
1127
1128         tchar *fs_source_path = TSTRDUP(source);
1129         int ret;
1130         struct wimlib_capture_source capture_src = {
1131                 .fs_source_path = fs_source_path,
1132                 .wim_target_path = NULL,
1133                 .reserved = 0,
1134         };
1135         ret = wimlib_add_image_multisource(w, &capture_src, 1, name,
1136                                            config_str, config_len,
1137                                            add_image_flags, progress_func);
1138         FREE(fs_source_path);
1139         return ret;
1140 }