0e55a981e6e2e8ca2a4ba88ee812417ff9d83fb7
[wimlib] / src / add_image.c
1 /*
2  * add_image.c
3  */
4
5 /*
6  * Copyright (C) 2012, 2013 Eric Biggers
7  *
8  * This file is part of wimlib, a library for working with WIM files.
9  *
10  * wimlib is free software; you can redistribute it and/or modify it under the
11  * terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option)
13  * any later version.
14  *
15  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17  * A PARTICULAR PURPOSE. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with wimlib; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #include "config.h"
25
26 #ifdef __WIN32__
27 #  include "win32.h"
28 #else
29 #  include <dirent.h>
30 #  include <sys/stat.h>
31 #  include <fnmatch.h>
32 #  include "timestamp.h"
33 #endif
34
35 #include "wimlib_internal.h"
36 #include "dentry.h"
37 #include "lookup_table.h"
38 #include "xml.h"
39 #include "security.h"
40
41 #include <ctype.h>
42 #include <errno.h>
43 #include <stdlib.h>
44 #include <limits.h>
45 #include <string.h>
46
47 #include <unistd.h>
48
49 #ifdef HAVE_ALLOCA_H
50 #  include <alloca.h>
51 #endif
52
53 /*
54  * Adds the dentry tree and security data for a new image to the image metadata
55  * array of the WIMStruct.
56  */
57 int
58 add_new_dentry_tree(WIMStruct *w, struct wim_dentry *root_dentry,
59                     struct wim_security_data *sd)
60 {
61         struct wim_lookup_table_entry *metadata_lte;
62         struct wim_image_metadata *imd;
63         struct wim_image_metadata *new_imd;
64
65         wimlib_assert(root_dentry != NULL);
66
67         DEBUG("Reallocating image metadata array for image_count = %u",
68               w->hdr.image_count + 1);
69         imd = CALLOC((w->hdr.image_count + 1), sizeof(struct wim_image_metadata));
70
71         if (!imd) {
72                 ERROR("Failed to allocate memory for new image metadata array");
73                 goto err;
74         }
75
76         memcpy(imd, w->image_metadata,
77                w->hdr.image_count * sizeof(struct wim_image_metadata));
78
79         metadata_lte = new_lookup_table_entry();
80         if (!metadata_lte)
81                 goto err_free_imd;
82
83         metadata_lte->resource_entry.flags = WIM_RESHDR_FLAG_METADATA;
84
85         new_imd = &imd[w->hdr.image_count];
86
87         new_imd->root_dentry    = root_dentry;
88         new_imd->metadata_lte   = metadata_lte;
89         new_imd->security_data  = sd;
90         new_imd->modified       = 1;
91
92         FREE(w->image_metadata);
93         w->image_metadata = imd;
94         w->hdr.image_count++;
95         return 0;
96 err_free_imd:
97         FREE(imd);
98 err:
99         return WIMLIB_ERR_NOMEM;
100
101 }
102
103 #ifndef __WIN32__
104
105 static int
106 unix_capture_regular_file(const char *path,
107                           uint64_t size,
108                           struct wim_inode *inode,
109                           struct wim_lookup_table *lookup_table)
110 {
111         struct wim_lookup_table_entry *lte;
112         u8 hash[SHA1_HASH_SIZE];
113         int ret;
114
115         inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
116
117         /* Empty files do not have to have a lookup table entry. */
118         if (size == 0)
119                 return 0;
120
121         /* For each regular file, we must check to see if the file is in
122          * the lookup table already; if it is, we increment its refcnt;
123          * otherwise, we create a new lookup table entry and insert it.
124          * */
125
126         ret = sha1sum(path, hash);
127         if (ret)
128                 return ret;
129
130         lte = __lookup_resource(lookup_table, hash);
131         if (lte) {
132                 lte->refcnt++;
133                 DEBUG("Add lte reference %u for `%s'", lte->refcnt,
134                       path);
135         } else {
136                 char *file_on_disk = STRDUP(path);
137                 if (!file_on_disk) {
138                         ERROR("Failed to allocate memory for file path");
139                         return WIMLIB_ERR_NOMEM;
140                 }
141                 lte = new_lookup_table_entry();
142                 if (!lte) {
143                         FREE(file_on_disk);
144                         return WIMLIB_ERR_NOMEM;
145                 }
146                 lte->file_on_disk = file_on_disk;
147                 lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
148                 lte->resource_entry.original_size = size;
149                 lte->resource_entry.size = size;
150                 copy_hash(lte->hash, hash);
151                 lookup_table_insert(lookup_table, lte);
152         }
153         inode->i_lte = lte;
154         return 0;
155 }
156
157 static int
158 unix_build_dentry_tree_recursive(struct wim_dentry **root_ret,
159                                  char *path,
160                                  size_t path_len,
161                                  struct wim_lookup_table *lookup_table,
162                                  const struct capture_config *config,
163                                  int add_image_flags,
164                                  wimlib_progress_func_t progress_func);
165
166 static int
167 unix_capture_directory(struct wim_dentry *dir_dentry,
168                        char *path,
169                        size_t path_len,
170                        struct wim_lookup_table *lookup_table,
171                        const struct capture_config *config,
172                        int add_image_flags,
173                        wimlib_progress_func_t progress_func)
174 {
175
176         DIR *dir;
177         struct dirent entry, *result;
178         struct wim_dentry *child;
179         int ret;
180
181         dir_dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
182         dir = opendir(path);
183         if (!dir) {
184                 ERROR_WITH_ERRNO("Failed to open the directory `%s'",
185                                  path);
186                 return WIMLIB_ERR_OPEN;
187         }
188
189         /* Recurse on directory contents */
190         while (1) {
191                 errno = 0;
192                 ret = readdir_r(dir, &entry, &result);
193                 if (ret != 0) {
194                         ret = WIMLIB_ERR_READ;
195                         ERROR_WITH_ERRNO("Error reading the "
196                                          "directory `%s'",
197                                          path);
198                         break;
199                 }
200                 if (result == NULL)
201                         break;
202                 if (result->d_name[0] == '.' && (result->d_name[1] == '\0'
203                       || (result->d_name[1] == '.' && result->d_name[2] == '\0')))
204                                 continue;
205
206                 size_t name_len = strlen(result->d_name);
207
208                 path[path_len] = '/';
209                 memcpy(&path[path_len + 1], result->d_name, name_len + 1);
210                 ret = unix_build_dentry_tree_recursive(&child,
211                                                        path,
212                                                        path_len + 1 + name_len,
213                                                        lookup_table,
214                                                        config,
215                                                        add_image_flags,
216                                                        progress_func);
217                 if (ret)
218                         break;
219                 if (child)
220                         dentry_add_child(dir_dentry, child);
221         }
222         closedir(dir);
223         return ret;
224 }
225
226 static int
227 unix_capture_symlink(const char *path,
228                      struct wim_inode *inode,
229                      struct wim_lookup_table *lookup_table)
230 {
231         char deref_name_buf[4096];
232         ssize_t deref_name_len;
233         int ret;
234
235         inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
236         inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
237
238         /* The idea here is to call readlink() to get the UNIX target of
239          * the symbolic link, then turn the target into a reparse point
240          * data buffer that contains a relative or absolute symbolic
241          * link (NOT a junction point or *full* path symbolic link with
242          * drive letter).
243          */
244         deref_name_len = readlink(path, deref_name_buf,
245                                   sizeof(deref_name_buf) - 1);
246         if (deref_name_len >= 0) {
247                 deref_name_buf[deref_name_len] = '\0';
248                 DEBUG("Read symlink `%s'", deref_name_buf);
249                 ret = inode_set_symlink(inode, deref_name_buf,
250                                         lookup_table, NULL);
251                 if (ret == 0) {
252                         /* Unfortunately, Windows seems to have the concept of
253                          * "file" symbolic links as being different from
254                          * "directory" symbolic links...  so
255                          * FILE_ATTRIBUTE_DIRECTORY needs to be set on the
256                          * symbolic link if the *target* of the symbolic link is
257                          * a directory.  */
258                         struct stat stbuf;
259                         if (stat(path, &stbuf) == 0 && S_ISDIR(stbuf.st_mode))
260                                 inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
261                 }
262         } else {
263                 ERROR_WITH_ERRNO("Failed to read target of "
264                                  "symbolic link `%s'", path);
265                 ret = WIMLIB_ERR_READLINK;
266         }
267         return ret;
268 }
269
270 static int
271 unix_build_dentry_tree_recursive(struct wim_dentry **root_ret,
272                                  char *path,
273                                  size_t path_len,
274                                  struct wim_lookup_table *lookup_table,
275                                  const struct capture_config *config,
276                                  int add_image_flags,
277                                  wimlib_progress_func_t progress_func)
278 {
279         struct wim_dentry *root = NULL;
280         int ret = 0;
281         struct wim_inode *inode;
282
283         if (exclude_path(path, path_len, config, true)) {
284                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) {
285                         ERROR("Cannot exclude the root directory from capture");
286                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
287                         goto out;
288                 }
289                 if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_EXCLUDE_VERBOSE)
290                     && progress_func)
291                 {
292                         union wimlib_progress_info info;
293                         info.scan.cur_path = path;
294                         info.scan.excluded = true;
295                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
296                 }
297                 goto out;
298         }
299
300         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
301             && progress_func)
302         {
303                 union wimlib_progress_info info;
304                 info.scan.cur_path = path;
305                 info.scan.excluded = false;
306                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
307         }
308
309         /* UNIX version of capturing a directory tree */
310         struct stat stbuf;
311         int (*stat_fn)(const char *restrict, struct stat *restrict);
312         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE)
313                 stat_fn = stat;
314         else
315                 stat_fn = lstat;
316
317         ret = (*stat_fn)(path, &stbuf);
318         if (ret != 0) {
319                 ERROR_WITH_ERRNO("Failed to stat `%s'", path);
320                 goto out;
321         }
322
323         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) &&
324               !S_ISDIR(stbuf.st_mode))
325         {
326                 /* Do a dereference-stat in case the root is a symbolic link.
327                  * This case is allowed, provided that the symbolic link points
328                  * to a directory. */
329                 ret = stat(path, &stbuf);
330                 if (ret != 0) {
331                         ERROR_WITH_ERRNO("Failed to stat `%s'", path);
332                         ret = WIMLIB_ERR_STAT;
333                         goto out;
334                 }
335                 if (!S_ISDIR(stbuf.st_mode)) {
336                         ERROR("`%s' is not a directory", path);
337                         ret = WIMLIB_ERR_NOTDIR;
338                         goto out;
339                 }
340         }
341         if (!S_ISREG(stbuf.st_mode) && !S_ISDIR(stbuf.st_mode)
342             && !S_ISLNK(stbuf.st_mode)) {
343                 ERROR("`%s' is not a regular file, directory, or symbolic link.",
344                       path);
345                 ret = WIMLIB_ERR_SPECIAL_FILE;
346                 goto out;
347         }
348
349         ret = new_dentry_with_timeless_inode(path_basename_with_len(path, path_len),
350                                              &root);
351         if (ret)
352                 goto out;
353
354         inode = root->d_inode;
355
356 #ifdef HAVE_STAT_NANOSECOND_PRECISION
357         inode->i_creation_time = timespec_to_wim_timestamp(stbuf.st_mtim);
358         inode->i_last_write_time = timespec_to_wim_timestamp(stbuf.st_mtim);
359         inode->i_last_access_time = timespec_to_wim_timestamp(stbuf.st_atim);
360 #else
361         inode->i_creation_time = unix_timestamp_to_wim(stbuf.st_mtime);
362         inode->i_last_write_time = unix_timestamp_to_wim(stbuf.st_mtime);
363         inode->i_last_access_time = unix_timestamp_to_wim(stbuf.st_atime);
364 #endif
365         /* Leave the inode number at 0 for directories.  Otherwise grab the
366          * inode number from the `stat' buffer, including the device number if
367          * possible. */
368         if (!S_ISDIR(stbuf.st_mode)) {
369                 if (sizeof(ino_t) >= 8)
370                         inode->i_ino = (u64)stbuf.st_ino;
371                 else
372                         inode->i_ino = (u64)stbuf.st_ino |
373                                            ((u64)stbuf.st_dev <<
374                                                 ((sizeof(ino_t) * 8) & 63));
375         }
376         inode->i_resolved = 1;
377         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
378                 ret = inode_set_unix_data(inode, stbuf.st_uid,
379                                           stbuf.st_gid,
380                                           stbuf.st_mode,
381                                           lookup_table,
382                                           UNIX_DATA_ALL | UNIX_DATA_CREATE);
383                 if (ret)
384                         goto out;
385         }
386         add_image_flags &= ~(WIMLIB_ADD_IMAGE_FLAG_ROOT | WIMLIB_ADD_IMAGE_FLAG_SOURCE);
387         if (S_ISREG(stbuf.st_mode))
388                 ret = unix_capture_regular_file(path, stbuf.st_size,
389                                                 inode, lookup_table);
390         else if (S_ISDIR(stbuf.st_mode))
391                 ret = unix_capture_directory(root, path, path_len,
392                                              lookup_table, config,
393                                              add_image_flags, progress_func);
394         else
395                 ret = unix_capture_symlink(path, inode, lookup_table);
396 out:
397         if (ret == 0)
398                 *root_ret = root;
399         else
400                 free_dentry_tree(root, lookup_table);
401         return ret;
402 }
403
404 /*
405  * unix_build_dentry_tree():
406  *      Builds a tree of WIM dentries from an on-disk directory tree (UNIX
407  *      version; no NTFS-specific data is captured).
408  *
409  * @root_ret:   Place to return a pointer to the root of the dentry tree.  Only
410  *              modified if successful.  Set to NULL if the file or directory was
411  *              excluded from capture.
412  *
413  * @root_disk_path:  The path to the root of the directory tree on disk.
414  *
415  * @lookup_table: The lookup table for the WIM file.  For each file added to the
416  *              dentry tree being built, an entry is added to the lookup table,
417  *              unless an identical stream is already in the lookup table.
418  *              These lookup table entries that are added point to the path of
419  *              the file on disk.
420  *
421  * @sd_set:     Ignored.  (Security data only captured in NTFS mode.)
422  *
423  * @capture_config:
424  *              Configuration for files to be excluded from capture.
425  *
426  * @add_flags:  Bitwise or of WIMLIB_ADD_IMAGE_FLAG_*
427  *
428  * @extra_arg:  Ignored
429  *
430  * @return:     0 on success, nonzero on failure.  It is a failure if any of
431  *              the files cannot be `stat'ed, or if any of the needed
432  *              directories cannot be opened or read.  Failure to add the files
433  *              to the WIM may still occur later when trying to actually read
434  *              the on-disk files during a call to wimlib_write() or
435  *              wimlib_overwrite().
436  */
437 static int
438 unix_build_dentry_tree(struct wim_dentry **root_ret,
439                        const char *root_disk_path,
440                        struct wim_lookup_table *lookup_table,
441                        struct sd_set *sd_set,
442                        const struct capture_config *config,
443                        int add_image_flags,
444                        wimlib_progress_func_t progress_func,
445                        void *extra_arg)
446 {
447         char *path_buf;
448         int ret;
449         size_t path_len;
450         size_t path_bufsz;
451
452         path_bufsz = min(32790, PATH_MAX + 1);
453         path_len = strlen(root_disk_path);
454
455         if (path_len >= path_bufsz)
456                 return WIMLIB_ERR_INVALID_PARAM;
457
458         path_buf = MALLOC(path_bufsz);
459         if (!path_buf)
460                 return WIMLIB_ERR_NOMEM;
461         memcpy(path_buf, root_disk_path, path_len + 1);
462         ret = unix_build_dentry_tree_recursive(root_ret,
463                                                path_buf,
464                                                path_len,
465                                                lookup_table,
466                                                config,
467                                                add_image_flags,
468                                                progress_func);
469         FREE(path_buf);
470         return ret;
471 }
472 #endif /* !__WIN32__ */
473
474 enum pattern_type {
475         NONE = 0,
476         EXCLUSION_LIST,
477         EXCLUSION_EXCEPTION,
478         COMPRESSION_EXCLUSION_LIST,
479         ALIGNMENT_LIST,
480 };
481
482 #define COMPAT_DEFAULT_CONFIG
483
484 /* Default capture configuration file when none is specified. */
485 static const tchar *default_config =
486 #ifdef COMPAT_DEFAULT_CONFIG /* XXX: This policy is being moved to library
487                                 users.  The next ABI-incompatible library
488                                 version will default to the empty string here. */
489 T(
490 "[ExclusionList]\n"
491 "\\$ntfs.log\n"
492 "\\hiberfil.sys\n"
493 "\\pagefile.sys\n"
494 "\\System Volume Information\n"
495 "\\RECYCLER\n"
496 "\\Windows\\CSC\n"
497 );
498 #else
499 T("");
500 #endif
501
502 static void
503 destroy_pattern_list(struct pattern_list *list)
504 {
505         FREE(list->pats);
506 }
507
508 static void
509 destroy_capture_config(struct capture_config *config)
510 {
511         destroy_pattern_list(&config->exclusion_list);
512         destroy_pattern_list(&config->exclusion_exception);
513         destroy_pattern_list(&config->compression_exclusion_list);
514         destroy_pattern_list(&config->alignment_list);
515         FREE(config->config_str);
516         memset(config, 0, sizeof(*config));
517 }
518
519 static int
520 pattern_list_add_pattern(struct pattern_list *list, const tchar *pattern)
521 {
522         const tchar **pats;
523         if (list->num_pats >= list->num_allocated_pats) {
524                 pats = REALLOC(list->pats,
525                                sizeof(list->pats[0]) * (list->num_allocated_pats + 8));
526                 if (!pats)
527                         return WIMLIB_ERR_NOMEM;
528                 list->num_allocated_pats += 8;
529                 list->pats = pats;
530         }
531         list->pats[list->num_pats++] = pattern;
532         return 0;
533 }
534
535 /* Parses the contents of the image capture configuration file and fills in a
536  * `struct capture_config'. */
537 static int
538 init_capture_config(struct capture_config *config,
539                     const tchar *_config_str,
540                     size_t config_num_tchars)
541 {
542         tchar *config_str;
543         tchar *p;
544         tchar *eol;
545         tchar *next_p;
546         size_t num_tchars_remaining;
547         enum pattern_type type = NONE;
548         int ret;
549         unsigned long line_no = 0;
550
551         DEBUG("config_num_tchars = %zu", config_num_tchars);
552         num_tchars_remaining = config_num_tchars;
553         memset(config, 0, sizeof(*config));
554         config_str = TMALLOC(config_num_tchars);
555         if (!config_str) {
556                 ERROR("Could not duplicate capture config string");
557                 return WIMLIB_ERR_NOMEM;
558         }
559
560         tmemcpy(config_str, _config_str, config_num_tchars);
561         next_p = config_str;
562         config->config_str = config_str;
563         while (num_tchars_remaining != 0) {
564                 line_no++;
565                 p = next_p;
566                 eol = tmemchr(p, T('\n'), num_tchars_remaining);
567                 if (!eol) {
568                         ERROR("Expected end-of-line in capture config file on "
569                               "line %lu", line_no);
570                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
571                         goto out_destroy;
572                 }
573
574                 next_p = eol + 1;
575                 num_tchars_remaining -= (next_p - p);
576                 if (eol == p)
577                         continue;
578
579                 if (*(eol - 1) == T('\r'))
580                         eol--;
581                 *eol = T('\0');
582
583                 /* Translate backslash to forward slash */
584                 for (tchar *pp = p; pp != eol; pp++)
585                         if (*pp == T('\\'))
586                                 *pp = T('/');
587
588                 /* Check if the path begins with a drive letter */
589                 if (eol - p > 2 && *p != T('/') && *(p + 1) == T(':')) {
590                         /* Don't allow relative paths on other drives */
591                         if (eol - p < 3 || *(p + 2) != T('/')) {
592                                 ERROR("Relative paths including a drive letter "
593                                       "are not allowed!\n"
594                                       "        Perhaps you meant "
595                                       "\"%"TS":/%"TS"\"?\n",
596                                       *p, p + 2);
597                                 ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
598                                 goto out_destroy;
599                         }
600                 #ifndef __WIN32__
601                         /* UNIX: strip the drive letter */
602                         p += 2;
603                 #endif
604                 }
605
606                 ret = 0;
607                 if (!tstrcmp(p, T("[ExclusionList]")))
608                         type = EXCLUSION_LIST;
609                 else if (!tstrcmp(p, T("[ExclusionException]")))
610                         type = EXCLUSION_EXCEPTION;
611                 else if (!tstrcmp(p, T("[CompressionExclusionList]")))
612                         type = COMPRESSION_EXCLUSION_LIST;
613                 else if (!tstrcmp(p, T("[AlignmentList]")))
614                         type = ALIGNMENT_LIST;
615                 else if (p[0] == T('[') && tstrrchr(p, T(']'))) {
616                         ERROR("Unknown capture configuration section \"%"TS"\"", p);
617                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
618                 } else switch (type) {
619                 case EXCLUSION_LIST:
620                         DEBUG("Adding pattern \"%"TS"\" to exclusion list", p);
621                         ret = pattern_list_add_pattern(&config->exclusion_list, p);
622                         break;
623                 case EXCLUSION_EXCEPTION:
624                         DEBUG("Adding pattern \"%"TS"\" to exclusion exception list", p);
625                         ret = pattern_list_add_pattern(&config->exclusion_exception, p);
626                         break;
627                 case COMPRESSION_EXCLUSION_LIST:
628                         DEBUG("Adding pattern \"%"TS"\" to compression exclusion list", p);
629                         ret = pattern_list_add_pattern(&config->compression_exclusion_list, p);
630                         break;
631                 case ALIGNMENT_LIST:
632                         DEBUG("Adding pattern \"%"TS"\" to alignment list", p);
633                         ret = pattern_list_add_pattern(&config->alignment_list, p);
634                         break;
635                 default:
636                         ERROR("Line %lu of capture configuration is not "
637                               "in a block (such as [ExclusionList])",
638                               line_no);
639                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
640                         break;
641                 }
642                 if (ret != 0)
643                         goto out_destroy;
644         }
645         return 0;
646 out_destroy:
647         destroy_capture_config(config);
648         return ret;
649 }
650
651 static bool
652 is_absolute_path(const tchar *path)
653 {
654         if (*path == T('/'))
655                 return true;
656 #ifdef __WIN32__
657         /* Drive letter */
658         if (*path && *(path + 1) == T(':'))
659                 return true;
660 #endif
661         return false;
662 }
663
664 static bool
665 match_pattern(const tchar *path,
666               const tchar *path_basename,
667               const struct pattern_list *list)
668 {
669         for (size_t i = 0; i < list->num_pats; i++) {
670                 const tchar *pat = list->pats[i];
671                 const tchar *string;
672                 if (is_absolute_path(pat)) {
673                         /* Absolute path from root of capture */
674                         string = path;
675                 } else {
676                         if (tstrchr(pat, T('/')))
677                                 /* Relative path from root of capture */
678                                 string = path + 1;
679                         else
680                                 /* A file name pattern */
681                                 string = path_basename;
682                 }
683
684                 /* Warning: on Windows native builds, fnmatch() calls the
685                  * replacement function in win32.c. */
686                 if (fnmatch(pat, string, FNM_PATHNAME
687                                 #ifdef FNM_CASEFOLD
688                                         | FNM_CASEFOLD
689                                 #endif
690                             ) == 0)
691                 {
692                         DEBUG("\"%"TS"\" matches the pattern \"%"TS"\"",
693                               string, pat);
694                         return true;
695                 }
696         }
697         return false;
698 }
699
700 /* Return true if the image capture configuration file indicates we should
701  * exclude the filename @path from capture.
702  *
703  * If @exclude_prefix is %true, the part of the path up and including the name
704  * of the directory being captured is not included in the path for matching
705  * purposes.  This allows, for example, a pattern like /hiberfil.sys to match a
706  * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7
707  * directory.
708  */
709 bool
710 exclude_path(const tchar *path, size_t path_len,
711              const struct capture_config *config, bool exclude_prefix)
712 {
713         const tchar *basename = path_basename_with_len(path, path_len);
714         if (exclude_prefix) {
715                 wimlib_assert(path_len >= config->prefix_num_tchars);
716                 if (!tmemcmp(config->prefix, path, config->prefix_num_tchars) &&
717                     path[config->prefix_num_tchars] == T('/'))
718                 {
719                         path += config->prefix_num_tchars;
720                 }
721         }
722         return match_pattern(path, basename, &config->exclusion_list) &&
723                 !match_pattern(path, basename, &config->exclusion_exception);
724
725 }
726
727 /* Strip leading and trailing forward slashes from a string.  Modifies it in
728  * place and returns the stripped string. */
729 static const tchar *
730 canonicalize_target_path(tchar *target_path)
731 {
732         tchar *p;
733         if (target_path == NULL)
734                 return T("");
735         for (;;) {
736                 if (*target_path == T('\0'))
737                         return target_path;
738                 else if (*target_path == T('/'))
739                         target_path++;
740                 else
741                         break;
742         }
743
744         p = tstrchr(target_path, T('\0')) - 1;
745         while (*p == T('/'))
746                 *p-- = T('\0');
747         return target_path;
748 }
749
750 /* Strip leading and trailing slashes from the target paths, and translate all
751  * backslashes in the source and target paths into forward slashes. */
752 static void
753 canonicalize_sources_and_targets(struct wimlib_capture_source *sources,
754                                  size_t num_sources)
755 {
756         while (num_sources--) {
757                 DEBUG("Canonicalizing { source: \"%"TS"\", target=\"%"TS"\"}",
758                       sources->fs_source_path,
759                       sources->wim_target_path);
760
761                 /* The Windows API can handle forward slashes.  Just get rid of
762                  * backslashes to avoid confusing other parts of the library
763                  * code. */
764                 zap_backslashes(sources->fs_source_path);
765                 if (sources->wim_target_path)
766                         zap_backslashes(sources->wim_target_path);
767
768                 sources->wim_target_path =
769                         (tchar*)canonicalize_target_path(sources->wim_target_path);
770                 DEBUG("Canonical target: \"%"TS"\"", sources->wim_target_path);
771                 sources++;
772         }
773 }
774
775 static int
776 capture_source_cmp(const void *p1, const void *p2)
777 {
778         const struct wimlib_capture_source *s1 = p1, *s2 = p2;
779         return tstrcmp(s1->wim_target_path, s2->wim_target_path);
780 }
781
782 /* Sorts the capture sources lexicographically by target path.  This occurs
783  * after leading and trailing forward slashes are stripped.
784  *
785  * One purpose of this is to make sure that target paths that are inside other
786  * target paths are added after the containing target paths. */
787 static void
788 sort_sources(struct wimlib_capture_source *sources, size_t num_sources)
789 {
790         qsort(sources, num_sources, sizeof(sources[0]), capture_source_cmp);
791 }
792
793 static int
794 check_sorted_sources(struct wimlib_capture_source *sources, size_t num_sources,
795                      int add_image_flags)
796 {
797         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
798                 if (num_sources != 1) {
799                         ERROR("Must specify exactly 1 capture source "
800                               "(the NTFS volume) in NTFS mode!");
801                         return WIMLIB_ERR_INVALID_PARAM;
802                 }
803                 if (sources[0].wim_target_path[0] != T('\0')) {
804                         ERROR("In NTFS capture mode the target path inside "
805                               "the image must be the root directory!");
806                         return WIMLIB_ERR_INVALID_PARAM;
807                 }
808         } else if (num_sources != 0) {
809                 /* This code is disabled because the current code
810                  * unconditionally attempts to do overlays.  So, duplicate
811                  * target paths are OK. */
812         #if 0
813                 if (num_sources > 1 && sources[0].wim_target_path[0] == '\0') {
814                         ERROR("Cannot specify root target when using multiple "
815                               "capture sources!");
816                         return WIMLIB_ERR_INVALID_PARAM;
817                 }
818                 for (size_t i = 0; i < num_sources - 1; i++) {
819                         size_t len = strlen(sources[i].wim_target_path);
820                         size_t j = i + 1;
821                         const char *target1 = sources[i].wim_target_path;
822                         do {
823                                 const char *target2 = sources[j].wim_target_path;
824                                 DEBUG("target1=%s, target2=%s",
825                                       target1,target2);
826                                 if (strncmp(target1, target2, len) ||
827                                     target2[len] > '/')
828                                         break;
829                                 if (target2[len] == '/') {
830                                         ERROR("Invalid target `%s': is a prefix of `%s'",
831                                               target1, target2);
832                                         return WIMLIB_ERR_INVALID_PARAM;
833                                 }
834                                 if (target2[len] == '\0') {
835                                         ERROR("Invalid target `%s': is a duplicate of `%s'",
836                                               target1, target2);
837                                         return WIMLIB_ERR_INVALID_PARAM;
838                                 }
839                         } while (++j != num_sources);
840                 }
841         #endif
842         }
843         return 0;
844
845 }
846
847 /* Creates a new directory to place in the WIM image.  This is to create parent
848  * directories that are not part of any target as needed.  */
849 static int
850 new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret)
851 {
852         int ret;
853         struct wim_dentry *dentry;
854
855         DEBUG("Creating filler directory \"%"TS"\"", name);
856         ret = new_dentry_with_inode(name, &dentry);
857         if (ret == 0) {
858                 /* Leave the inode number as 0 for now.  The final inode number
859                  * will be assigned later by assign_inode_numbers(). */
860                 dentry->d_inode->i_resolved = 1;
861                 dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
862                 *dentry_ret = dentry;
863         }
864         return ret;
865 }
866
867 /* Transfers the children of @branch to @target.  It is an error if @target is
868  * not a directory or if both @branch and @target contain a child dentry with
869  * the same name. */
870 static int
871 do_overlay(struct wim_dentry *target, struct wim_dentry *branch)
872 {
873         struct rb_root *rb_root;
874
875         DEBUG("Doing overlay \"%"WS"\" => \"%"WS"\"",
876               branch->file_name, target->file_name);
877
878         if (!dentry_is_directory(target)) {
879                 ERROR("Cannot overlay directory \"%"WS"\" "
880                       "over non-directory", branch->file_name);
881                 return WIMLIB_ERR_INVALID_OVERLAY;
882         }
883
884         rb_root = &branch->d_inode->i_children;
885         while (rb_root->rb_node) { /* While @branch has children... */
886                 struct wim_dentry *child = rbnode_dentry(rb_root->rb_node);
887                 /* Move @child to the directory @target */
888                 unlink_dentry(child);
889                 if (!dentry_add_child(target, child)) {
890                         /* Revert the change to avoid leaking the directory tree
891                          * rooted at @child */
892                         dentry_add_child(branch, child);
893                         ERROR("Overlay error: file \"%"WS"\" already exists "
894                               "as a child of \"%"WS"\"",
895                               child->file_name, target->file_name);
896                         return WIMLIB_ERR_INVALID_OVERLAY;
897                 }
898         }
899         free_dentry(branch);
900         return 0;
901
902 }
903
904 /* Attach or overlay a branch onto the WIM image.
905  *
906  * @root_p:
907  *      Pointer to the root of the WIM image, or pointer to NULL if it has not
908  *      been created yet.
909  * @branch
910  *      Branch to add.
911  * @target_path:
912  *      Path in the WIM image to add the branch, with leading and trailing
913  *      slashes stripped.
914  */
915 static int
916 attach_branch(struct wim_dentry **root_p, struct wim_dentry *branch,
917               tchar *target_path)
918 {
919         tchar *slash;
920         struct wim_dentry *dentry, *parent, *target;
921         int ret;
922
923         DEBUG("Attaching branch \"%"WS"\" => \"%"TS"\"",
924               branch->file_name, target_path);
925
926         if (*target_path == T('\0')) {
927                 /* Target: root directory */
928                 if (*root_p) {
929                         /* Overlay on existing root */
930                         return do_overlay(*root_p, branch);
931                 } else  {
932                         /* Set as root */
933                         *root_p = branch;
934                         return 0;
935                 }
936         }
937
938         /* Adding a non-root branch.  Create root if it hasn't been created
939          * already. */
940         if (!*root_p) {
941                 ret  = new_filler_directory(T(""), root_p);
942                 if (ret)
943                         return ret;
944         }
945
946         /* Walk the path to the branch, creating filler directories as needed.
947          * */
948         parent = *root_p;
949         while ((slash = tstrchr(target_path, T('/')))) {
950                 *slash = T('\0');
951                 dentry = get_dentry_child_with_name(parent, target_path);
952                 if (!dentry) {
953                         ret = new_filler_directory(target_path, &dentry);
954                         if (ret)
955                                 return ret;
956                         dentry_add_child(parent, dentry);
957                 }
958                 parent = dentry;
959                 target_path = slash;
960                 /* Skip over slashes.  Note: this cannot overrun the length of
961                  * the string because the last character cannot be a slash, as
962                  * trailing slashes were tripped.  */
963                 do {
964                         ++target_path;
965                 } while (*target_path == T('/'));
966         }
967
968         /* If the target path already existed, overlay the branch onto it.
969          * Otherwise, set the branch as the target path. */
970         target = get_dentry_child_with_utf16le_name(parent, branch->file_name,
971                                                     branch->file_name_nbytes);
972         if (target) {
973                 return do_overlay(target, branch);
974         } else {
975                 dentry_add_child(parent, branch);
976                 return 0;
977         }
978 }
979
980 WIMLIBAPI int
981 wimlib_add_image_multisource(WIMStruct *w,
982                              struct wimlib_capture_source *sources,
983                              size_t num_sources,
984                              const tchar *name,
985                              const tchar *config_str,
986                              size_t config_len,
987                              int add_image_flags,
988                              wimlib_progress_func_t progress_func)
989 {
990         int (*capture_tree)(struct wim_dentry **,
991                             const tchar *,
992                             struct wim_lookup_table *,
993                             struct sd_set *,
994                             const struct capture_config *,
995                             int,
996                             wimlib_progress_func_t,
997                             void *);
998         void *extra_arg;
999         struct wim_dentry *root_dentry;
1000         struct wim_dentry *branch;
1001         struct wim_security_data *sd;
1002         struct capture_config config;
1003         struct wim_image_metadata *imd;
1004         int ret;
1005         struct sd_set sd_set;
1006
1007         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
1008 #ifdef WITH_NTFS_3G
1009                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
1010                         ERROR("Cannot dereference files when capturing directly from NTFS");
1011                         return WIMLIB_ERR_INVALID_PARAM;
1012                 }
1013                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
1014                         ERROR("Capturing UNIX owner and mode not supported "
1015                               "when capturing directly from NTFS");
1016                         return WIMLIB_ERR_INVALID_PARAM;
1017                 }
1018                 capture_tree = build_dentry_tree_ntfs;
1019                 extra_arg = &w->ntfs_vol;
1020 #else
1021                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
1022                       "        cannot capture a WIM image directly from a NTFS volume!");
1023                 return WIMLIB_ERR_UNSUPPORTED;
1024 #endif
1025         } else {
1026         #ifdef __WIN32__
1027                 capture_tree = win32_build_dentry_tree;
1028         #else
1029                 capture_tree = unix_build_dentry_tree;
1030         #endif
1031                 extra_arg = NULL;
1032         }
1033
1034 #ifdef __WIN32__
1035         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
1036                 ERROR("Capturing UNIX-specific data is not supported on Windows");
1037                 return WIMLIB_ERR_INVALID_PARAM;
1038         }
1039         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
1040                 ERROR("Dereferencing symbolic links is not supported on Windows");
1041                 return WIMLIB_ERR_INVALID_PARAM;
1042         }
1043 #endif
1044
1045         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
1046                 add_image_flags |= WIMLIB_ADD_IMAGE_FLAG_EXCLUDE_VERBOSE;
1047
1048         if (!name || !*name) {
1049                 ERROR("Must specify a non-empty string for the image name");
1050                 return WIMLIB_ERR_INVALID_PARAM;
1051         }
1052
1053         if (w->hdr.total_parts != 1) {
1054                 ERROR("Cannot add an image to a split WIM");
1055                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
1056         }
1057
1058         if (wimlib_image_name_in_use(w, name)) {
1059                 ERROR("There is already an image named \"%"TS"\" in the WIM!",
1060                       name);
1061                 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
1062         }
1063
1064         if (!config_str) {
1065                 DEBUG("Using default capture configuration");
1066                 config_str = default_config;
1067                 config_len = tstrlen(default_config);
1068         }
1069         ret = init_capture_config(&config, config_str, config_len);
1070         if (ret)
1071                 goto out;
1072
1073         DEBUG("Allocating security data");
1074         sd = CALLOC(1, sizeof(struct wim_security_data));
1075         if (!sd) {
1076                 ret = WIMLIB_ERR_NOMEM;
1077                 goto out_destroy_capture_config;
1078         }
1079         sd->total_length = 8;
1080         sd->refcnt = 1;
1081
1082         sd_set.sd = sd;
1083         sd_set.rb_root.rb_node = NULL;
1084
1085         DEBUG("Using %zu capture sources", num_sources);
1086         canonicalize_sources_and_targets(sources, num_sources);
1087         sort_sources(sources, num_sources);
1088         ret = check_sorted_sources(sources, num_sources, add_image_flags);
1089         if (ret) {
1090                 ret = WIMLIB_ERR_INVALID_PARAM;
1091                 goto out_free_security_data;
1092         }
1093
1094         DEBUG("Building dentry tree.");
1095         root_dentry = NULL;
1096
1097         for (size_t i = 0; i < num_sources; i++) {
1098                 int flags;
1099                 union wimlib_progress_info progress;
1100
1101                 DEBUG("Building dentry tree for source %zu of %zu "
1102                       "(\"%"TS"\" => \"%"TS"\")", i + 1, num_sources,
1103                       sources[i].fs_source_path,
1104                       sources[i].wim_target_path);
1105                 if (progress_func) {
1106                         memset(&progress, 0, sizeof(progress));
1107                         progress.scan.source = sources[i].fs_source_path;
1108                         progress.scan.wim_target_path = sources[i].wim_target_path;
1109                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_BEGIN, &progress);
1110                 }
1111                 config.prefix = sources[i].fs_source_path;
1112                 config.prefix_num_tchars = tstrlen(sources[i].fs_source_path);
1113                 flags = add_image_flags | WIMLIB_ADD_IMAGE_FLAG_SOURCE;
1114                 if (!*sources[i].wim_target_path)
1115                         flags |= WIMLIB_ADD_IMAGE_FLAG_ROOT;
1116                 ret = (*capture_tree)(&branch,
1117                                       sources[i].fs_source_path,
1118                                       w->lookup_table,
1119                                       &sd_set,
1120                                       &config,
1121                                       flags,
1122                                       progress_func, extra_arg);
1123                 if (ret) {
1124                         ERROR("Failed to build dentry tree for `%"TS"'",
1125                               sources[i].fs_source_path);
1126                         goto out_free_dentry_tree;
1127                 }
1128                 if (branch) {
1129                         /* Use the target name, not the source name, for
1130                          * the root of each branch from a capture
1131                          * source.  (This will also set the root dentry
1132                          * of the entire image to be unnamed.) */
1133                         ret = set_dentry_name(branch,
1134                                               path_basename(sources[i].wim_target_path));
1135                         if (ret)
1136                                 goto out_free_branch;
1137
1138                         ret = attach_branch(&root_dentry, branch,
1139                                             sources[i].wim_target_path);
1140                         if (ret)
1141                                 goto out_free_branch;
1142                 }
1143                 if (progress_func)
1144                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_END, &progress);
1145         }
1146
1147         if (root_dentry == NULL) {
1148                 ret = new_filler_directory(T(""), &root_dentry);
1149                 if (ret)
1150                         goto out_free_dentry_tree;
1151         }
1152
1153         DEBUG("Calculating full paths of dentries.");
1154         ret = for_dentry_in_tree(root_dentry, calculate_dentry_full_path, NULL);
1155         if (ret)
1156                 goto out_free_dentry_tree;
1157
1158         ret = add_new_dentry_tree(w, root_dentry, sd);
1159         if (ret)
1160                 goto out_free_dentry_tree;
1161
1162         imd = &w->image_metadata[w->hdr.image_count - 1];
1163
1164         ret = dentry_tree_fix_inodes(root_dentry, &imd->inode_list);
1165         if (ret)
1166                 goto out_destroy_imd;
1167
1168         DEBUG("Assigning hard link group IDs");
1169         assign_inode_numbers(&imd->inode_list);
1170
1171         ret = xml_add_image(w, name);
1172         if (ret)
1173                 goto out_destroy_imd;
1174
1175         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_BOOT)
1176                 wimlib_set_boot_idx(w, w->hdr.image_count);
1177         ret = 0;
1178         goto out_destroy_sd_set;
1179 out_destroy_imd:
1180         destroy_image_metadata(&w->image_metadata[w->hdr.image_count - 1],
1181                                w->lookup_table);
1182         w->hdr.image_count--;
1183         goto out_destroy_sd_set;
1184 out_free_branch:
1185         free_dentry_tree(branch, w->lookup_table);
1186 out_free_dentry_tree:
1187         free_dentry_tree(root_dentry, w->lookup_table);
1188 out_free_security_data:
1189         free_security_data(sd);
1190 out_destroy_sd_set:
1191         destroy_sd_set(&sd_set);
1192 out_destroy_capture_config:
1193         destroy_capture_config(&config);
1194 out:
1195         return ret;
1196 }
1197
1198 WIMLIBAPI int
1199 wimlib_add_image(WIMStruct *w,
1200                  const tchar *source,
1201                  const tchar *name,
1202                  const tchar *config_str,
1203                  size_t config_len,
1204                  int add_image_flags,
1205                  wimlib_progress_func_t progress_func)
1206 {
1207         if (!source || !*source)
1208                 return WIMLIB_ERR_INVALID_PARAM;
1209
1210         tchar *fs_source_path = TSTRDUP(source);
1211         int ret;
1212         struct wimlib_capture_source capture_src = {
1213                 .fs_source_path = fs_source_path,
1214                 .wim_target_path = NULL,
1215                 .reserved = 0,
1216         };
1217         ret = wimlib_add_image_multisource(w, &capture_src, 1, name,
1218                                            config_str, config_len,
1219                                            add_image_flags, progress_func);
1220         FREE(fs_source_path);
1221         return ret;
1222 }