Encodings update (IN PROGRESS)
[wimlib] / src / add_image.c
1 /*
2  * add_image.c
3  */
4
5 /*
6  * Copyright (C) 2012, 2013 Eric Biggers
7  *
8  * This file is part of wimlib, a library for working with WIM files.
9  *
10  * wimlib is free software; you can redistribute it and/or modify it under the
11  * terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option)
13  * any later version.
14  *
15  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17  * A PARTICULAR PURPOSE. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with wimlib; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #include "config.h"
25
26 #ifdef __WIN32__
27 #  include "win32.h"
28 #else
29 #  include <dirent.h>
30 #  include <sys/stat.h>
31 #  include <fnmatch.h>
32 #  include "timestamp.h"
33 #endif
34
35 #include "wimlib_internal.h"
36 #include "dentry.h"
37 #include "lookup_table.h"
38 #include "xml.h"
39 #include "security.h"
40
41 #include <ctype.h>
42 #include <errno.h>
43 #include <stdlib.h>
44
45 #if TCHAR_IS_UTF16LE
46 #  include <wchar.h>
47 #else
48 #  include <string.h>
49 #endif
50
51 #include <unistd.h>
52
53 #ifdef HAVE_ALLOCA_H
54 #  include <alloca.h>
55 #endif
56
57 /*
58  * Adds the dentry tree and security data for a new image to the image metadata
59  * array of the WIMStruct.
60  */
61 int
62 add_new_dentry_tree(WIMStruct *w, struct wim_dentry *root_dentry,
63                     struct wim_security_data *sd)
64 {
65         struct wim_lookup_table_entry *metadata_lte;
66         struct wim_image_metadata *imd;
67         struct wim_image_metadata *new_imd;
68
69         wimlib_assert(root_dentry != NULL);
70
71         DEBUG("Reallocating image metadata array for image_count = %u",
72               w->hdr.image_count + 1);
73         imd = CALLOC((w->hdr.image_count + 1), sizeof(struct wim_image_metadata));
74
75         if (!imd) {
76                 ERROR("Failed to allocate memory for new image metadata array");
77                 goto err;
78         }
79
80         memcpy(imd, w->image_metadata,
81                w->hdr.image_count * sizeof(struct wim_image_metadata));
82
83         metadata_lte = new_lookup_table_entry();
84         if (!metadata_lte)
85                 goto err_free_imd;
86
87         metadata_lte->resource_entry.flags = WIM_RESHDR_FLAG_METADATA;
88         random_hash(metadata_lte->hash);
89         lookup_table_insert(w->lookup_table, metadata_lte);
90
91         new_imd = &imd[w->hdr.image_count];
92
93         new_imd->root_dentry    = root_dentry;
94         new_imd->metadata_lte   = metadata_lte;
95         new_imd->security_data  = sd;
96         new_imd->modified       = 1;
97
98         FREE(w->image_metadata);
99         w->image_metadata = imd;
100         w->hdr.image_count++;
101         return 0;
102 err_free_imd:
103         FREE(imd);
104 err:
105         return WIMLIB_ERR_NOMEM;
106
107 }
108
109 #ifndef __WIN32__
110 /*
111  * unix_build_dentry_tree():
112  *      Recursively builds a tree of WIM dentries from an on-disk directory
113  *      tree (UNIX version; no NTFS-specific data is captured).
114  *
115  * @root_ret:   Place to return a pointer to the root of the dentry tree.  Only
116  *              modified if successful.  Set to NULL if the file or directory was
117  *              excluded from capture.
118  *
119  * @root_disk_path:  The path to the root of the directory tree on disk.
120  *
121  * @lookup_table: The lookup table for the WIM file.  For each file added to the
122  *              dentry tree being built, an entry is added to the lookup table,
123  *              unless an identical stream is already in the lookup table.
124  *              These lookup table entries that are added point to the path of
125  *              the file on disk.
126  *
127  * @sd_set:     Ignored.  (Security data only captured in NTFS mode.)
128  *
129  * @capture_config:
130  *              Configuration for files to be excluded from capture.
131  *
132  * @add_flags:  Bitwise or of WIMLIB_ADD_IMAGE_FLAG_*
133  *
134  * @extra_arg:  Ignored
135  *
136  * @return:     0 on success, nonzero on failure.  It is a failure if any of
137  *              the files cannot be `stat'ed, or if any of the needed
138  *              directories cannot be opened or read.  Failure to add the files
139  *              to the WIM may still occur later when trying to actually read
140  *              the on-disk files during a call to wimlib_write() or
141  *              wimlib_overwrite().
142  */
143 static int
144 unix_build_dentry_tree(struct wim_dentry **root_ret,
145                        const char *root_disk_path,
146                        struct wim_lookup_table *lookup_table,
147                        struct sd_set *sd,
148                        const struct capture_config *config,
149                        int add_image_flags,
150                        wimlib_progress_func_t progress_func,
151                        void *extra_arg)
152 {
153         struct wim_dentry *root = NULL;
154         int ret = 0;
155         struct wim_inode *inode;
156
157         if (exclude_path(root_disk_path, config, true)) {
158                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) {
159                         ERROR("Cannot exclude the root directory from capture");
160                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
161                         goto out;
162                 }
163                 if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
164                     && progress_func)
165                 {
166                         union wimlib_progress_info info;
167                         info.scan.cur_path = root_disk_path;
168                         info.scan.excluded = true;
169                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
170                 }
171                 goto out;
172         }
173
174         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
175             && progress_func)
176         {
177                 union wimlib_progress_info info;
178                 info.scan.cur_path = root_disk_path;
179                 info.scan.excluded = false;
180                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
181         }
182
183         /* UNIX version of capturing a directory tree */
184         struct stat root_stbuf;
185         int (*stat_fn)(const char *restrict, struct stat *restrict);
186         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE)
187                 stat_fn = stat;
188         else
189                 stat_fn = lstat;
190
191         ret = (*stat_fn)(root_disk_path, &root_stbuf);
192         if (ret != 0) {
193                 ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
194                 goto out;
195         }
196
197         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) &&
198               !S_ISDIR(root_stbuf.st_mode))
199         {
200                 /* Do a dereference-stat in case the root is a symbolic link.
201                  * This case is allowed, provided that the symbolic link points
202                  * to a directory. */
203                 ret = stat(root_disk_path, &root_stbuf);
204                 if (ret != 0) {
205                         ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
206                         ret = WIMLIB_ERR_STAT;
207                         goto out;
208                 }
209                 if (!S_ISDIR(root_stbuf.st_mode)) {
210                         ERROR("`%s' is not a directory", root_disk_path);
211                         ret = WIMLIB_ERR_NOTDIR;
212                         goto out;
213                 }
214         }
215         if (!S_ISREG(root_stbuf.st_mode) && !S_ISDIR(root_stbuf.st_mode)
216             && !S_ISLNK(root_stbuf.st_mode)) {
217                 ERROR("`%s' is not a regular file, directory, or symbolic link.",
218                       root_disk_path);
219                 ret = WIMLIB_ERR_SPECIAL_FILE;
220                 goto out;
221         }
222
223         ret = new_dentry_with_timeless_inode(path_basename(root_disk_path),
224                                              &root);
225         if (ret)
226                 goto out;
227
228         inode = root->d_inode;
229
230 #ifdef HAVE_STAT_NANOSECOND_PRECISION
231         inode->i_creation_time = timespec_to_wim_timestamp(root_stbuf.st_mtim);
232         inode->i_last_write_time = timespec_to_wim_timestamp(root_stbuf.st_mtim);
233         inode->i_last_access_time = timespec_to_wim_timestamp(root_stbuf.st_atim);
234 #else
235         inode->i_creation_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
236         inode->i_last_write_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
237         inode->i_last_access_time = unix_timestamp_to_wim(root_stbuf.st_atime);
238 #endif
239         /* Leave the inode number at 0 for directories. */
240         if (!S_ISDIR(root_stbuf.st_mode)) {
241                 if (sizeof(ino_t) >= 8)
242                         inode->i_ino = (u64)root_stbuf.st_ino;
243                 else
244                         inode->i_ino = (u64)root_stbuf.st_ino |
245                                            ((u64)root_stbuf.st_dev <<
246                                                 ((sizeof(ino_t) * 8) & 63));
247         }
248         inode->i_resolved = 1;
249         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
250                 ret = inode_set_unix_data(inode, root_stbuf.st_uid,
251                                           root_stbuf.st_gid,
252                                           root_stbuf.st_mode,
253                                           lookup_table,
254                                           UNIX_DATA_ALL | UNIX_DATA_CREATE);
255                 if (ret)
256                         goto out;
257         }
258         add_image_flags &= ~(WIMLIB_ADD_IMAGE_FLAG_ROOT | WIMLIB_ADD_IMAGE_FLAG_SOURCE);
259         if (S_ISREG(root_stbuf.st_mode)) { /* Archiving a regular file */
260
261                 struct wim_lookup_table_entry *lte;
262                 u8 hash[SHA1_HASH_SIZE];
263
264                 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
265
266                 /* Empty files do not have to have a lookup table entry. */
267                 if (root_stbuf.st_size == 0)
268                         goto out;
269
270                 /* For each regular file, we must check to see if the file is in
271                  * the lookup table already; if it is, we increment its refcnt;
272                  * otherwise, we create a new lookup table entry and insert it.
273                  * */
274
275                 ret = sha1sum(root_disk_path, hash);
276                 if (ret != 0)
277                         goto out;
278
279                 lte = __lookup_resource(lookup_table, hash);
280                 if (lte) {
281                         lte->refcnt++;
282                         DEBUG("Add lte reference %u for `%s'", lte->refcnt,
283                               root_disk_path);
284                 } else {
285                         char *file_on_disk = STRDUP(root_disk_path);
286                         if (!file_on_disk) {
287                                 ERROR("Failed to allocate memory for file path");
288                                 ret = WIMLIB_ERR_NOMEM;
289                                 goto out;
290                         }
291                         lte = new_lookup_table_entry();
292                         if (!lte) {
293                                 FREE(file_on_disk);
294                                 ret = WIMLIB_ERR_NOMEM;
295                                 goto out;
296                         }
297                         lte->file_on_disk = file_on_disk;
298                         lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
299                         lte->resource_entry.original_size = root_stbuf.st_size;
300                         lte->resource_entry.size = root_stbuf.st_size;
301                         copy_hash(lte->hash, hash);
302                         lookup_table_insert(lookup_table, lte);
303                 }
304                 root->d_inode->i_lte = lte;
305         } else if (S_ISDIR(root_stbuf.st_mode)) { /* Archiving a directory */
306
307                 inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
308
309                 DIR *dir;
310                 struct dirent entry, *result;
311                 struct wim_dentry *child;
312
313                 dir = opendir(root_disk_path);
314                 if (!dir) {
315                         ERROR_WITH_ERRNO("Failed to open the directory `%s'",
316                                          root_disk_path);
317                         ret = WIMLIB_ERR_OPEN;
318                         goto out;
319                 }
320
321                 /* Buffer for names of files in directory. */
322                 size_t len = strlen(root_disk_path);
323                 char name[len + 1 + FILENAME_MAX + 1];
324                 memcpy(name, root_disk_path, len);
325                 name[len] = '/';
326
327                 /* Create a dentry for each entry in the directory on disk, and recurse
328                  * to any subdirectories. */
329                 while (1) {
330                         errno = 0;
331                         ret = readdir_r(dir, &entry, &result);
332                         if (ret != 0) {
333                                 ret = WIMLIB_ERR_READ;
334                                 ERROR_WITH_ERRNO("Error reading the "
335                                                  "directory `%s'",
336                                                  root_disk_path);
337                                 break;
338                         }
339                         if (result == NULL)
340                                 break;
341                         if (result->d_name[0] == '.' && (result->d_name[1] == '\0'
342                               || (result->d_name[1] == '.' && result->d_name[2] == '\0')))
343                                         continue;
344                         strcpy(name + len + 1, result->d_name);
345                         ret = unix_build_dentry_tree(&child, name,
346                                                      lookup_table,
347                                                      NULL, config,
348                                                      add_image_flags,
349                                                      progress_func, NULL);
350                         if (ret != 0)
351                                 break;
352                         if (child)
353                                 dentry_add_child(root, child);
354                 }
355                 closedir(dir);
356         } else { /* Archiving a symbolic link */
357                 inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
358                 inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
359
360                 /* The idea here is to call readlink() to get the UNIX target of
361                  * the symbolic link, then turn the target into a reparse point
362                  * data buffer that contains a relative or absolute symbolic
363                  * link (NOT a junction point or *full* path symbolic link with
364                  * drive letter).
365                  */
366
367                 char deref_name_buf[4096];
368                 ssize_t deref_name_len;
369
370                 deref_name_len = readlink(root_disk_path, deref_name_buf,
371                                           sizeof(deref_name_buf) - 1);
372                 if (deref_name_len >= 0) {
373                         deref_name_buf[deref_name_len] = '\0';
374                         DEBUG("Read symlink `%s'", deref_name_buf);
375                         ret = inode_set_symlink(root->d_inode, deref_name_buf,
376                                                 lookup_table, NULL);
377                         if (ret == 0) {
378                                 /*
379                                  * Unfortunately, Windows seems to have the
380                                  * concept of "file" symbolic links as being
381                                  * different from "directory" symbolic links...
382                                  * so FILE_ATTRIBUTE_DIRECTORY needs to be set
383                                  * on the symbolic link if the *target* of the
384                                  * symbolic link is a directory.
385                                  */
386                                 struct stat stbuf;
387                                 if (stat(root_disk_path, &stbuf) == 0 &&
388                                     S_ISDIR(stbuf.st_mode))
389                                 {
390                                         inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
391                                 }
392                         }
393                 } else {
394                         ERROR_WITH_ERRNO("Failed to read target of "
395                                          "symbolic link `%s'", root_disk_path);
396                         ret = WIMLIB_ERR_READLINK;
397                 }
398         }
399 out:
400         if (ret == 0)
401                 *root_ret = root;
402         else
403                 free_dentry_tree(root, lookup_table);
404         return ret;
405 }
406 #endif /* !__WIN32__ */
407
408 enum pattern_type {
409         NONE = 0,
410         EXCLUSION_LIST,
411         EXCLUSION_EXCEPTION,
412         COMPRESSION_EXCLUSION_LIST,
413         ALIGNMENT_LIST,
414 };
415
416 #define COMPAT_DEFAULT_CONFIG
417
418 /* Default capture configuration file when none is specified. */
419 static const tchar *default_config =
420 #ifdef COMPAT_DEFAULT_CONFIG /* XXX: This policy is being moved to library
421                                 users.  The next ABI-incompatible library
422                                 version will default to the empty string here. */
423 T(
424 "[ExclusionList]\n"
425 "\\$ntfs.log\n"
426 "\\hiberfil.sys\n"
427 "\\pagefile.sys\n"
428 "\\System Volume Information\n"
429 "\\RECYCLER\n"
430 "\\Windows\\CSC\n"
431 "\n"
432 "[CompressionExclusionList]\n"
433 "*.mp3\n"
434 "*.zip\n"
435 "*.cab\n"
436 "\\WINDOWS\\inf\\*.pnf\n"
437 );
438 #else
439 T("");
440 #endif
441
442 static void
443 destroy_pattern_list(struct pattern_list *list)
444 {
445         FREE(list->pats);
446 }
447
448 static void
449 destroy_capture_config(struct capture_config *config)
450 {
451         destroy_pattern_list(&config->exclusion_list);
452         destroy_pattern_list(&config->exclusion_exception);
453         destroy_pattern_list(&config->compression_exclusion_list);
454         destroy_pattern_list(&config->alignment_list);
455         FREE(config->config_str);
456         FREE(config->prefix);
457         memset(config, 0, sizeof(*config));
458 }
459
460 static int
461 pattern_list_add_pattern(struct pattern_list *list, const tchar *pattern)
462 {
463         const tchar **pats;
464         if (list->num_pats >= list->num_allocated_pats) {
465                 pats = REALLOC(list->pats,
466                                sizeof(list->pats[0]) * (list->num_allocated_pats + 8));
467                 if (!pats)
468                         return WIMLIB_ERR_NOMEM;
469                 list->num_allocated_pats += 8;
470                 list->pats = pats;
471         }
472         list->pats[list->num_pats++] = pattern;
473         return 0;
474 }
475
476 /* Parses the contents of the image capture configuration file and fills in a
477  * `struct capture_config'. */
478 static int
479 init_capture_config(struct capture_config *config,
480                     const tchar *_config_str,
481                     size_t config_num_tchars)
482 {
483         tchar *config_str;
484         tchar *p;
485         tchar *eol;
486         tchar *next_p;
487         size_t num_tchars_remaining;
488         enum pattern_type type = NONE;
489         int ret;
490         unsigned long line_no = 0;
491
492         DEBUG("config_num_tchars = %zu", config_num_tchars);
493         num_tchars_remaining = config_num_tchars;
494         memset(config, 0, sizeof(*config));
495         config_str = TMALLOC(config_num_tchars);
496         if (!config_str) {
497                 ERROR("Could not duplicate capture config string");
498                 return WIMLIB_ERR_NOMEM;
499         }
500
501         tmemcpy(config_str, _config_str, config_num_tchars);
502         next_p = config_str;
503         config->config_str = config_str;
504         while (num_tchars_remaining != 0) {
505                 line_no++;
506                 p = next_p;
507                 eol = tmemchr(p, T('\n'), num_tchars_remaining);
508                 if (!eol) {
509                         ERROR("Expected end-of-line in capture config file on "
510                               "line %lu", line_no);
511                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
512                         goto out_destroy;
513                 }
514
515                 next_p = eol + 1;
516                 num_tchars_remaining -= (next_p - p);
517                 if (eol == p)
518                         continue;
519
520                 if (*(eol - 1) == T('\r'))
521                         eol--;
522                 *eol = T('\0');
523
524                 /* Translate backslash to forward slash */
525                 for (tchar *pp = p; pp != eol; pp++)
526                         if (*pp == T('\\'))
527                                 *pp = T('/');
528
529                 /* Remove drive letter 
530                  * XXX maybe keep drive letter on Windows */
531                 if (eol - p > 2 && istalpha(*p) && *(p + 1) == T(':'))
532                         p += 2;
533
534                 ret = 0;
535                 if (tstrcmp(p, T("[ExclusionList]")) == 0)
536                         type = EXCLUSION_LIST;
537                 else if (tstrcmp(p, T("[ExclusionException]")) == 0)
538                         type = EXCLUSION_EXCEPTION;
539                 else if (tstrcmp(p, T("[CompressionExclusionList]")) == 0)
540                         type = COMPRESSION_EXCLUSION_LIST;
541                 else if (tstrcmp(p, T("[AlignmentList]")) == 0)
542                         type = ALIGNMENT_LIST;
543                 else if (p[0] == T('[') && tstrrchr(p, T(']'))) {
544                         ERROR("Unknown capture configuration section \"%"TS"\"", p);
545                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
546                 } else switch (type) {
547                 case EXCLUSION_LIST:
548                         DEBUG("Adding pattern \"%"TS"\" to exclusion list", p);
549                         ret = pattern_list_add_pattern(&config->exclusion_list, p);
550                         break;
551                 case EXCLUSION_EXCEPTION:
552                         DEBUG("Adding pattern \"%"TS"\" to exclusion exception list", p);
553                         ret = pattern_list_add_pattern(&config->exclusion_exception, p);
554                         break;
555                 case COMPRESSION_EXCLUSION_LIST:
556                         DEBUG("Adding pattern \"%"TS"\" to compression exclusion list", p);
557                         ret = pattern_list_add_pattern(&config->compression_exclusion_list, p);
558                         break;
559                 case ALIGNMENT_LIST:
560                         DEBUG("Adding pattern \"%"TS"\" to alignment list", p);
561                         ret = pattern_list_add_pattern(&config->alignment_list, p);
562                         break;
563                 default:
564                         ERROR("Line %lu of capture configuration is not "
565                               "in a block (such as [ExclusionList])",
566                               line_no);
567                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
568                         break;
569                 }
570                 if (ret != 0)
571                         goto out_destroy;
572         }
573         return 0;
574 out_destroy:
575         destroy_capture_config(config);
576         return ret;
577 }
578
579 static int capture_config_set_prefix(struct capture_config *config,
580                                      const tchar *_prefix)
581 {
582         tchar *prefix = TSTRDUP(_prefix);
583
584         if (!prefix)
585                 return WIMLIB_ERR_NOMEM;
586         FREE(config->prefix);
587         config->prefix = prefix;
588         config->prefix_num_tchars = tstrlen(prefix);
589         return 0;
590 }
591
592 static bool match_pattern(const tchar *path,
593                           const tchar *path_basename,
594                           const struct pattern_list *list)
595 {
596         for (size_t i = 0; i < list->num_pats; i++) {
597                 const tchar *pat = list->pats[i];
598                 const tchar *string;
599                 if (pat[0] == '/')
600                         /* Absolute path from root of capture */
601                         string = path;
602                 else {
603                         if (tstrchr(pat, T('/')))
604                                 /* Relative path from root of capture */
605                                 string = path + 1;
606                         else
607                                 /* A file name pattern */
608                                 string = path_basename;
609                 }
610
611                 /* Warning: on Windows native builds, fnmatch() calls the
612                  * replacement function in win32.c. */
613                 if (fnmatch(pat, string, FNM_PATHNAME
614                                 #ifdef FNM_CASEFOLD
615                                         | FNM_CASEFOLD
616                                 #endif
617                             ) == 0)
618                 {
619                         DEBUG("\"%"TS"\" matches the pattern \"%"TS"\"",
620                               string, pat);
621                         return true;
622                 }
623         }
624         return false;
625 }
626
627 /* Return true if the image capture configuration file indicates we should
628  * exclude the filename @path from capture.
629  *
630  * If @exclude_prefix is %true, the part of the path up and including the name
631  * of the directory being captured is not included in the path for matching
632  * purposes.  This allows, for example, a pattern like /hiberfil.sys to match a
633  * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7
634  * directory.
635  */
636 bool
637 exclude_path(const tchar *path, const struct capture_config *config,
638              bool exclude_prefix)
639 {
640         const tchar *basename = path_basename(path);
641         if (exclude_prefix) {
642                 wimlib_assert(tstrlen(path) >= config->prefix_num_tchars);
643                 if (tmemcmp(config->prefix, path, config->prefix_num_tchars) == 0
644                      && path[config->prefix_num_tchars] == T('/'))
645                         path += config->prefix_num_tchars;
646         }
647         return match_pattern(path, basename, &config->exclusion_list) &&
648                 !match_pattern(path, basename, &config->exclusion_exception);
649
650 }
651
652 /* Strip leading and trailing forward slashes from a string.  Modifies it in
653  * place and returns the stripped string. */
654 static const tchar *
655 canonicalize_target_path(tchar *target_path)
656 {
657         tchar *p;
658         if (target_path == NULL)
659                 return T("");
660         for (;;) {
661                 if (*target_path == T('\0'))
662                         return target_path;
663                 else if (*target_path == T('/'))
664                         target_path++;
665                 else
666                         break;
667         }
668
669         p = target_path + tstrlen(target_path) - 1;
670         while (*p == T('/'))
671                 *p-- = T('\0');
672         return target_path;
673 }
674
675 static void
676 zap_backslashes(tchar *s)
677 {
678         while (*s) {
679                 if (*s == T('\\'))
680                         *s = T('/');
681                 s++;
682         }
683 }
684
685 /* Strip leading and trailing slashes from the target paths */
686 static void
687 canonicalize_targets(struct wimlib_capture_source *sources, size_t num_sources)
688 {
689         while (num_sources--) {
690                 DEBUG("Canonicalizing { source: \"%"TS"\", target=\"%"TS"\"}",
691                       sources->fs_source_path,
692                       sources->wim_target_path);
693
694                 /* The Windows API can handle forward slashes.  Just get rid of
695                  * backslashes to avoid confusing other parts of the library
696                  * code. */
697                 zap_backslashes(sources->fs_source_path);
698                 if (sources->wim_target_path)
699                         zap_backslashes(sources->wim_target_path);
700
701                 sources->wim_target_path =
702                         (tchar*)canonicalize_target_path(sources->wim_target_path);
703                 DEBUG("Canonical target: \"%"TS"\"", sources->wim_target_path);
704                 sources++;
705         }
706 }
707
708 static int
709 capture_source_cmp(const void *p1, const void *p2)
710 {
711         const struct wimlib_capture_source *s1 = p1, *s2 = p2;
712         return tstrcmp(s1->wim_target_path, s2->wim_target_path);
713 }
714
715 /* Sorts the capture sources lexicographically by target path.  This occurs
716  * after leading and trailing forward slashes are stripped.
717  *
718  * One purpose of this is to make sure that target paths that are inside other
719  * target paths are added after the containing target paths. */
720 static void
721 sort_sources(struct wimlib_capture_source *sources, size_t num_sources)
722 {
723         qsort(sources, num_sources, sizeof(sources[0]), capture_source_cmp);
724 }
725
726 static int
727 check_sorted_sources(struct wimlib_capture_source *sources, size_t num_sources,
728                      int add_image_flags)
729 {
730         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
731                 if (num_sources != 1) {
732                         ERROR("Must specify exactly 1 capture source "
733                               "(the NTFS volume) in NTFS mode!");
734                         return WIMLIB_ERR_INVALID_PARAM;
735                 }
736                 if (sources[0].wim_target_path[0] != '\0') {
737                         ERROR("In NTFS capture mode the target path inside "
738                               "the image must be the root directory!");
739                         return WIMLIB_ERR_INVALID_PARAM;
740                 }
741         } else if (num_sources != 0) {
742                 /* This code is disabled because the current code
743                  * unconditionally attempts to do overlays.  So, duplicate
744                  * target paths are OK. */
745         #if 0
746                 if (num_sources > 1 && sources[0].wim_target_path[0] == '\0') {
747                         ERROR("Cannot specify root target when using multiple "
748                               "capture sources!");
749                         return WIMLIB_ERR_INVALID_PARAM;
750                 }
751                 for (size_t i = 0; i < num_sources - 1; i++) {
752                         size_t len = strlen(sources[i].wim_target_path);
753                         size_t j = i + 1;
754                         const char *target1 = sources[i].wim_target_path;
755                         do {
756                                 const char *target2 = sources[j].wim_target_path;
757                                 DEBUG("target1=%s, target2=%s",
758                                       target1,target2);
759                                 if (strncmp(target1, target2, len) ||
760                                     target2[len] > '/')
761                                         break;
762                                 if (target2[len] == '/') {
763                                         ERROR("Invalid target `%s': is a prefix of `%s'",
764                                               target1, target2);
765                                         return WIMLIB_ERR_INVALID_PARAM;
766                                 }
767                                 if (target2[len] == '\0') {
768                                         ERROR("Invalid target `%s': is a duplicate of `%s'",
769                                               target1, target2);
770                                         return WIMLIB_ERR_INVALID_PARAM;
771                                 }
772                         } while (++j != num_sources);
773                 }
774         #endif
775         }
776         return 0;
777
778 }
779
780 /* Creates a new directory to place in the WIM image.  This is to create parent
781  * directories that are not part of any target as needed.  */
782 static int
783 new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret)
784 {
785         int ret;
786         struct wim_dentry *dentry;
787
788         DEBUG("Creating filler directory \"%"TS"\"", name);
789         ret = new_dentry_with_inode(name, &dentry);
790         if (ret == 0) {
791                 /* Leave the inode number as 0 for now.  The final inode number
792                  * will be assigned later by assign_inode_numbers(). */
793                 dentry->d_inode->i_resolved = 1;
794                 dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
795                 *dentry_ret = dentry;
796         }
797         return ret;
798 }
799
800 /* Transfers the children of @branch to @target.  It is an error if @target is
801  * not a directory or if both @branch and @target contain a child dentry with
802  * the same name. */
803 static int
804 do_overlay(struct wim_dentry *target, struct wim_dentry *branch)
805 {
806         struct rb_root *rb_root;
807
808         DEBUG("Doing overlay \"%"WS"\" => \"%"WS"\"",
809               branch->file_name, target->file_name);
810
811         if (!dentry_is_directory(target)) {
812                 ERROR("Cannot overlay directory \"%"WS"\" "
813                       "over non-directory", branch->file_name);
814                 return WIMLIB_ERR_INVALID_OVERLAY;
815         }
816
817         rb_root = &branch->d_inode->i_children;
818         while (rb_root->rb_node) { /* While @branch has children... */
819                 struct wim_dentry *child = rbnode_dentry(rb_root->rb_node);
820                 /* Move @child to the directory @target */
821                 unlink_dentry(child);
822                 if (!dentry_add_child(target, child)) {
823                         /* Revert the change to avoid leaking the directory tree
824                          * rooted at @child */
825                         dentry_add_child(branch, child);
826                         ERROR("Overlay error: file \"%"WS"\" already exists "
827                               "as a child of \"%"WS"\"",
828                               child->file_name, target->file_name);
829                         return WIMLIB_ERR_INVALID_OVERLAY;
830                 }
831         }
832         free_dentry(branch);
833         return 0;
834
835 }
836
837 /* Attach or overlay a branch onto the WIM image.
838  *
839  * @root_p:
840  *      Pointer to the root of the WIM image, or pointer to NULL if it has not
841  *      been created yet.
842  * @branch
843  *      Branch to add.
844  * @target_path:
845  *      Path in the WIM image to add the branch, with leading and trailing
846  *      slashes stripped.
847  */
848 static int
849 attach_branch(struct wim_dentry **root_p, struct wim_dentry *branch,
850               tchar *target_path)
851 {
852         tchar *slash;
853         struct wim_dentry *dentry, *parent, *target;
854         int ret;
855
856         DEBUG("Attaching branch \"%"WS"\" => \"%"TS"\"",
857               branch->file_name, target_path);
858
859         if (*target_path == T('\0')) {
860                 /* Target: root directory */
861                 if (*root_p) {
862                         /* Overlay on existing root */
863                         return do_overlay(*root_p, branch);
864                 } else  {
865                         /* Set as root */
866                         *root_p = branch;
867                         return 0;
868                 }
869         }
870
871         /* Adding a non-root branch.  Create root if it hasn't been created
872          * already. */
873         if (!*root_p) {
874                 ret  = new_filler_directory(T(""), root_p);
875                 if (ret)
876                         return ret;
877         }
878
879         /* Walk the path to the branch, creating filler directories as needed.
880          * */
881         parent = *root_p;
882         while ((slash = tstrchr(target_path, T('/')))) {
883                 *slash = '\0';
884                 dentry = get_dentry_child_with_name(parent, target_path);
885                 if (!dentry) {
886                         ret = new_filler_directory(target_path, &dentry);
887                         if (ret)
888                                 return ret;
889                         dentry_add_child(parent, dentry);
890                 }
891                 parent = dentry;
892                 target_path = slash;
893                 /* Skip over slashes.  Note: this cannot overrun the length of
894                  * the string because the last character cannot be a slash, as
895                  * trailing slashes were tripped.  */
896                 do {
897                         ++target_path;
898                 } while (*target_path == T('/'));
899         }
900
901         /* If the target path already existed, overlay the branch onto it.
902          * Otherwise, set the branch as the target path. */
903         target = get_dentry_child_with_utf16le_name(parent, branch->file_name,
904                                                     branch->file_name_nbytes);
905         if (target) {
906                 return do_overlay(target, branch);
907         } else {
908                 dentry_add_child(parent, branch);
909                 return 0;
910         }
911 }
912
913 WIMLIBAPI int
914 wimlib_add_image_multisource(WIMStruct *w,
915                              struct wimlib_capture_source *sources,
916                              size_t num_sources,
917                              const tchar *name,
918                              const tchar *config_str,
919                              size_t config_len,
920                              int add_image_flags,
921                              wimlib_progress_func_t progress_func)
922 {
923         int (*capture_tree)(struct wim_dentry **,
924                             const tchar *,
925                             struct wim_lookup_table *,
926                             struct sd_set *,
927                             const struct capture_config *,
928                             int,
929                             wimlib_progress_func_t,
930                             void *);
931         void *extra_arg;
932         struct wim_dentry *root_dentry;
933         struct wim_dentry *branch;
934         struct wim_security_data *sd;
935         struct capture_config config;
936         struct wim_image_metadata *imd;
937         int ret;
938         struct sd_set sd_set;
939
940         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
941 #ifdef WITH_NTFS_3G
942                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
943                         ERROR("Cannot dereference files when capturing directly from NTFS");
944                         return WIMLIB_ERR_INVALID_PARAM;
945                 }
946                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
947                         ERROR("Capturing UNIX owner and mode not supported "
948                               "when capturing directly from NTFS");
949                         return WIMLIB_ERR_INVALID_PARAM;
950                 }
951                 capture_tree = build_dentry_tree_ntfs;
952                 extra_arg = &w->ntfs_vol;
953 #else
954                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
955                       "        cannot capture a WIM image directly from a NTFS volume!");
956                 return WIMLIB_ERR_UNSUPPORTED;
957 #endif
958         } else {
959         #ifdef __WIN32__
960                 capture_tree = win32_build_dentry_tree;
961         #else
962                 capture_tree = unix_build_dentry_tree;
963         #endif
964                 extra_arg = NULL;
965         }
966
967 #ifdef __WIN32__
968         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
969                 ERROR("Capturing UNIX-specific data is not supported on Windows");
970                 return WIMLIB_ERR_INVALID_PARAM;
971         }
972         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
973                 ERROR("Dereferencing symbolic links is not supported on Windows");
974                 return WIMLIB_ERR_INVALID_PARAM;
975         }
976 #endif
977
978         if (!name || !*name) {
979                 ERROR("Must specify a non-empty string for the image name");
980                 return WIMLIB_ERR_INVALID_PARAM;
981         }
982
983         if (w->hdr.total_parts != 1) {
984                 ERROR("Cannot add an image to a split WIM");
985                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
986         }
987
988         if (wimlib_image_name_in_use(w, name)) {
989                 ERROR("There is already an image named \"%"TS"\" in the WIM!",
990                       name);
991                 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
992         }
993
994         if (!config_str) {
995                 DEBUG("Using default capture configuration");
996                 config_str = default_config;
997                 config_len = tstrlen(default_config);
998         }
999         ret = init_capture_config(&config, config_str, config_len);
1000         if (ret)
1001                 goto out;
1002
1003         DEBUG("Allocating security data");
1004         sd = CALLOC(1, sizeof(struct wim_security_data));
1005         if (!sd) {
1006                 ret = WIMLIB_ERR_NOMEM;
1007                 goto out_destroy_capture_config;
1008         }
1009         sd->total_length = 8;
1010         sd->refcnt = 1;
1011
1012         sd_set.sd = sd;
1013         sd_set.rb_root.rb_node = NULL;
1014
1015         DEBUG("Using %zu capture sources", num_sources);
1016         canonicalize_targets(sources, num_sources);
1017         sort_sources(sources, num_sources);
1018         ret = check_sorted_sources(sources, num_sources, add_image_flags);
1019         if (ret) {
1020                 ret = WIMLIB_ERR_INVALID_PARAM;
1021                 goto out_free_security_data;
1022         }
1023
1024         DEBUG("Building dentry tree.");
1025         root_dentry = NULL;
1026
1027         for (size_t i = 0; i < num_sources; i++) {
1028                 int flags;
1029                 union wimlib_progress_info progress;
1030
1031                 DEBUG("Building dentry tree for source %zu of %zu "
1032                       "(\"%"TS"\" => \"%"TS"\")", i + 1, num_sources,
1033                       sources[i].fs_source_path,
1034                       sources[i].wim_target_path);
1035                 if (progress_func) {
1036                         memset(&progress, 0, sizeof(progress));
1037                         progress.scan.source = sources[i].fs_source_path;
1038                         progress.scan.wim_target_path = sources[i].wim_target_path;
1039                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_BEGIN, &progress);
1040                 }
1041                 ret = capture_config_set_prefix(&config,
1042                                                 sources[i].fs_source_path);
1043                 if (ret)
1044                         goto out_free_dentry_tree;
1045                 flags = add_image_flags | WIMLIB_ADD_IMAGE_FLAG_SOURCE;
1046                 if (!*sources[i].wim_target_path)
1047                         flags |= WIMLIB_ADD_IMAGE_FLAG_ROOT;
1048                 ret = (*capture_tree)(&branch,
1049                                       sources[i].fs_source_path,
1050                                       w->lookup_table,
1051                                       &sd_set,
1052                                       &config,
1053                                       flags,
1054                                       progress_func, extra_arg);
1055                 if (ret) {
1056                         ERROR("Failed to build dentry tree for `%"TS"'",
1057                               sources[i].fs_source_path);
1058                         goto out_free_dentry_tree;
1059                 }
1060                 if (branch) {
1061                         /* Use the target name, not the source name, for
1062                          * the root of each branch from a capture
1063                          * source.  (This will also set the root dentry
1064                          * of the entire image to be unnamed.) */
1065                         ret = set_dentry_name(branch,
1066                                               path_basename(sources[i].wim_target_path));
1067                         if (ret)
1068                                 goto out_free_branch;
1069
1070                         ret = attach_branch(&root_dentry, branch,
1071                                             sources[i].wim_target_path);
1072                         if (ret)
1073                                 goto out_free_branch;
1074                 }
1075                 if (progress_func)
1076                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_END, &progress);
1077         }
1078
1079         if (root_dentry == NULL) {
1080                 ret = new_filler_directory(T(""), &root_dentry);
1081                 if (ret)
1082                         goto out_free_dentry_tree;
1083         }
1084
1085         DEBUG("Calculating full paths of dentries.");
1086         ret = for_dentry_in_tree(root_dentry, calculate_dentry_full_path, NULL);
1087         if (ret)
1088                 goto out_free_dentry_tree;
1089
1090         ret = add_new_dentry_tree(w, root_dentry, sd);
1091         if (ret)
1092                 goto out_free_dentry_tree;
1093
1094         imd = &w->image_metadata[w->hdr.image_count - 1];
1095
1096         ret = dentry_tree_fix_inodes(root_dentry, &imd->inode_list);
1097         if (ret)
1098                 goto out_destroy_imd;
1099
1100         DEBUG("Assigning hard link group IDs");
1101         assign_inode_numbers(&imd->inode_list);
1102
1103         ret = xml_add_image(w, name);
1104         if (ret)
1105                 goto out_destroy_imd;
1106
1107         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_BOOT)
1108                 wimlib_set_boot_idx(w, w->hdr.image_count);
1109         ret = 0;
1110         goto out_destroy_sd_set;
1111 out_destroy_imd:
1112         destroy_image_metadata(&w->image_metadata[w->hdr.image_count - 1],
1113                                w->lookup_table);
1114         w->hdr.image_count--;
1115         goto out;
1116 out_free_branch:
1117         free_dentry_tree(branch, w->lookup_table);
1118 out_free_dentry_tree:
1119         free_dentry_tree(root_dentry, w->lookup_table);
1120 out_free_security_data:
1121         free_security_data(sd);
1122 out_destroy_sd_set:
1123         destroy_sd_set(&sd_set);
1124 out_destroy_capture_config:
1125         destroy_capture_config(&config);
1126 out:
1127         return ret;
1128 }
1129
1130 WIMLIBAPI int
1131 wimlib_add_image(WIMStruct *w,
1132                  const tchar *source,
1133                  const tchar *name,
1134                  const tchar *config_str,
1135                  size_t config_len,
1136                  int add_image_flags,
1137                  wimlib_progress_func_t progress_func)
1138 {
1139         if (!source || !*source)
1140                 return WIMLIB_ERR_INVALID_PARAM;
1141
1142         tchar *fs_source_path = TSTRDUP(source);
1143         int ret;
1144         struct wimlib_capture_source capture_src = {
1145                 .fs_source_path = fs_source_path,
1146                 .wim_target_path = NULL,
1147                 .reserved = 0,
1148         };
1149         ret = wimlib_add_image_multisource(w, &capture_src, 1, name,
1150                                            config_str, config_len,
1151                                            add_image_flags, progress_func);
1152         FREE(fs_source_path);
1153         return ret;
1154 }