]> wimlib.net Git - wimlib/blob - src/add_image.c
Improve char encoding support (IN PROGRESS)
[wimlib] / src / add_image.c
1 /*
2  * add_image.c
3  */
4
5 /*
6  * Copyright (C) 2012, 2013 Eric Biggers
7  *
8  * This file is part of wimlib, a library for working with WIM files.
9  *
10  * wimlib is free software; you can redistribute it and/or modify it under the
11  * terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option)
13  * any later version.
14  *
15  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17  * A PARTICULAR PURPOSE. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with wimlib; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #include "config.h"
25
26 #ifdef __WIN32__
27 #  include "win32.h"
28 #else
29 #  include <dirent.h>
30 #  include <sys/stat.h>
31 #  include <fnmatch.h>
32 #  include "timestamp.h"
33 #endif
34
35 #include "wimlib_internal.h"
36 #include "dentry.h"
37 #include "lookup_table.h"
38 #include "xml.h"
39
40 #include <ctype.h>
41 #include <errno.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45
46 #ifdef HAVE_ALLOCA_H
47 #  include <alloca.h>
48 #endif
49
50 /*
51  * Adds the dentry tree and security data for a new image to the image metadata
52  * array of the WIMStruct.
53  */
54 int
55 add_new_dentry_tree(WIMStruct *w, struct wim_dentry *root_dentry,
56                     struct wim_security_data *sd)
57 {
58         struct wim_lookup_table_entry *metadata_lte;
59         struct wim_image_metadata *imd;
60         struct wim_image_metadata *new_imd;
61
62         wimlib_assert(root_dentry != NULL);
63
64         DEBUG("Reallocating image metadata array for image_count = %u",
65               w->hdr.image_count + 1);
66         imd = CALLOC((w->hdr.image_count + 1), sizeof(struct wim_image_metadata));
67
68         if (!imd) {
69                 ERROR("Failed to allocate memory for new image metadata array");
70                 goto err;
71         }
72
73         memcpy(imd, w->image_metadata,
74                w->hdr.image_count * sizeof(struct wim_image_metadata));
75
76         metadata_lte = new_lookup_table_entry();
77         if (!metadata_lte)
78                 goto err_free_imd;
79
80         metadata_lte->resource_entry.flags = WIM_RESHDR_FLAG_METADATA;
81         random_hash(metadata_lte->hash);
82         lookup_table_insert(w->lookup_table, metadata_lte);
83
84         new_imd = &imd[w->hdr.image_count];
85
86         new_imd->root_dentry    = root_dentry;
87         new_imd->metadata_lte   = metadata_lte;
88         new_imd->security_data  = sd;
89         new_imd->modified       = 1;
90
91         FREE(w->image_metadata);
92         w->image_metadata = imd;
93         w->hdr.image_count++;
94         return 0;
95 err_free_imd:
96         FREE(imd);
97 err:
98         return WIMLIB_ERR_NOMEM;
99
100 }
101
102 #ifndef __WIN32__
103 /*
104  * build_dentry_tree():
105  *      Recursively builds a tree of WIM dentries from an on-disk directory
106  *      tree.
107  *
108  * @root_ret:   Place to return a pointer to the root of the dentry tree.  Only
109  *              modified if successful.  Set to NULL if the file or directory was
110  *              excluded from capture.
111  *
112  * @root_disk_path:  The path to the root of the directory tree on disk (UTF-8).
113  *
114  * @lookup_table: The lookup table for the WIM file.  For each file added to the
115  *              dentry tree being built, an entry is added to the lookup table,
116  *              unless an identical stream is already in the lookup table.
117  *              These lookup table entries that are added point to the path of
118  *              the file on disk.
119  *
120  * @sd:         Ignored.  (Security data only captured in NTFS mode.)
121  *
122  * @capture_config:
123  *              Configuration for files to be excluded from capture.
124  *
125  * @add_flags:  Bitwise or of WIMLIB_ADD_IMAGE_FLAG_*
126  *
127  * @extra_arg:  Ignored in UNIX builds; used to pass sd_set pointer in Windows
128  *              builds.
129  *
130  * @return:     0 on success, nonzero on failure.  It is a failure if any of
131  *              the files cannot be `stat'ed, or if any of the needed
132  *              directories cannot be opened or read.  Failure to add the files
133  *              to the WIM may still occur later when trying to actually read
134  *              the on-disk files during a call to wimlib_write() or
135  *              wimlib_overwrite().
136  */
137 static int
138 unix_build_dentry_tree(struct wim_dentry **root_ret,
139                        const mbchar *root_disk_path,
140                        struct wim_lookup_table *lookup_table,
141                        struct wim_security_data *sd,
142                        const struct capture_config *config,
143                        int add_image_flags,
144                        wimlib_progress_func_t progress_func,
145                        void *extra_arg)
146 {
147         struct wim_dentry *root = NULL;
148         int ret = 0;
149         struct wim_inode *inode;
150
151         if (exclude_path(root_disk_path, config, true)) {
152                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) {
153                         ERROR("Cannot exclude the root directory from capture");
154                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
155                         goto out;
156                 }
157                 if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
158                     && progress_func)
159                 {
160                         union wimlib_progress_info info;
161                         info.scan.cur_path = root_disk_path;
162                         info.scan.excluded = true;
163                         progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
164                 }
165                 goto out;
166         }
167
168         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
169             && progress_func)
170         {
171                 union wimlib_progress_info info;
172                 info.scan.cur_path = root_disk_path;
173                 info.scan.excluded = false;
174                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
175         }
176
177         /* UNIX version of capturing a directory tree */
178         struct stat root_stbuf;
179         int (*stat_fn)(const char *restrict, struct stat *restrict);
180         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE)
181                 stat_fn = stat;
182         else
183                 stat_fn = lstat;
184
185         ret = (*stat_fn)(root_disk_path, &root_stbuf);
186         if (ret != 0) {
187                 ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
188                 goto out;
189         }
190
191         if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_ROOT) &&
192               !S_ISDIR(root_stbuf.st_mode))
193         {
194                 /* Do a dereference-stat in case the root is a symbolic link.
195                  * This case is allowed, provided that the symbolic link points
196                  * to a directory. */
197                 ret = stat(root_disk_path, &root_stbuf);
198                 if (ret != 0) {
199                         ERROR_WITH_ERRNO("Failed to stat `%s'", root_disk_path);
200                         ret = WIMLIB_ERR_STAT;
201                         goto out;
202                 }
203                 if (!S_ISDIR(root_stbuf.st_mode)) {
204                         ERROR("`%s' is not a directory", root_disk_path);
205                         ret = WIMLIB_ERR_NOTDIR;
206                         goto out;
207                 }
208         }
209         if (!S_ISREG(root_stbuf.st_mode) && !S_ISDIR(root_stbuf.st_mode)
210             && !S_ISLNK(root_stbuf.st_mode)) {
211                 ERROR("`%s' is not a regular file, directory, or symbolic link.",
212                       root_disk_path);
213                 ret = WIMLIB_ERR_SPECIAL_FILE;
214                 goto out;
215         }
216
217         root = new_dentry_with_timeless_inode(path_basename(root_disk_path));
218         if (!root) {
219                 if (errno == EILSEQ)
220                         ret = WIMLIB_ERR_INVALID_UTF8_STRING;
221                 else if (errno == ENOMEM)
222                         ret = WIMLIB_ERR_NOMEM;
223                 else
224                         ret = WIMLIB_ERR_ICONV_NOT_AVAILABLE;
225                 goto out;
226         }
227
228         inode = root->d_inode;
229
230 #ifdef HAVE_STAT_NANOSECOND_PRECISION
231         inode->i_creation_time = timespec_to_wim_timestamp(root_stbuf.st_mtim);
232         inode->i_last_write_time = timespec_to_wim_timestamp(root_stbuf.st_mtim);
233         inode->i_last_access_time = timespec_to_wim_timestamp(root_stbuf.st_atim);
234 #else
235         inode->i_creation_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
236         inode->i_last_write_time = unix_timestamp_to_wim(root_stbuf.st_mtime);
237         inode->i_last_access_time = unix_timestamp_to_wim(root_stbuf.st_atime);
238 #endif
239         /* Leave the inode number at 0 for directories. */
240         if (!S_ISDIR(root_stbuf.st_mode)) {
241                 if (sizeof(ino_t) >= 8)
242                         inode->i_ino = (u64)root_stbuf.st_ino;
243                 else
244                         inode->i_ino = (u64)root_stbuf.st_ino |
245                                            ((u64)root_stbuf.st_dev <<
246                                                 ((sizeof(ino_t) * 8) & 63));
247         }
248         inode->i_resolved = 1;
249         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
250                 ret = inode_set_unix_data(inode, root_stbuf.st_uid,
251                                           root_stbuf.st_gid,
252                                           root_stbuf.st_mode,
253                                           lookup_table,
254                                           UNIX_DATA_ALL | UNIX_DATA_CREATE);
255                 if (ret)
256                         goto out;
257         }
258         add_image_flags &= ~(WIMLIB_ADD_IMAGE_FLAG_ROOT | WIMLIB_ADD_IMAGE_FLAG_SOURCE);
259         if (S_ISREG(root_stbuf.st_mode)) { /* Archiving a regular file */
260
261                 struct wim_lookup_table_entry *lte;
262                 u8 hash[SHA1_HASH_SIZE];
263
264                 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
265
266                 /* Empty files do not have to have a lookup table entry. */
267                 if (root_stbuf.st_size == 0)
268                         goto out;
269
270                 /* For each regular file, we must check to see if the file is in
271                  * the lookup table already; if it is, we increment its refcnt;
272                  * otherwise, we create a new lookup table entry and insert it.
273                  * */
274
275                 ret = sha1sum(root_disk_path, hash);
276                 if (ret != 0)
277                         goto out;
278
279                 lte = __lookup_resource(lookup_table, hash);
280                 if (lte) {
281                         lte->refcnt++;
282                         DEBUG("Add lte reference %u for `%s'", lte->refcnt,
283                               root_disk_path);
284                 } else {
285                         mbchar *file_on_disk = STRDUP(root_disk_path);
286                         if (!file_on_disk) {
287                                 ERROR("Failed to allocate memory for file path");
288                                 ret = WIMLIB_ERR_NOMEM;
289                                 goto out;
290                         }
291                         lte = new_lookup_table_entry();
292                         if (!lte) {
293                                 FREE(file_on_disk);
294                                 ret = WIMLIB_ERR_NOMEM;
295                                 goto out;
296                         }
297                         lte->file_on_disk = file_on_disk;
298                         lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
299                         lte->resource_entry.original_size = root_stbuf.st_size;
300                         lte->resource_entry.size = root_stbuf.st_size;
301                         copy_hash(lte->hash, hash);
302                         lookup_table_insert(lookup_table, lte);
303                 }
304                 root->d_inode->i_lte = lte;
305         } else if (S_ISDIR(root_stbuf.st_mode)) { /* Archiving a directory */
306
307                 inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
308
309                 DIR *dir;
310                 struct dirent entry, *result;
311                 struct wim_dentry *child;
312
313                 dir = opendir(root_disk_path);
314                 if (!dir) {
315                         ERROR_WITH_ERRNO("Failed to open the directory `%s'",
316                                          root_disk_path);
317                         ret = WIMLIB_ERR_OPEN;
318                         goto out;
319                 }
320
321                 /* Buffer for names of files in directory. */
322                 size_t len = strlen(root_disk_path);
323                 mbchar name[len + 1 + FILENAME_MAX + 1];
324                 memcpy(name, root_disk_path, len);
325                 name[len] = '/';
326
327                 /* Create a dentry for each entry in the directory on disk, and recurse
328                  * to any subdirectories. */
329                 while (1) {
330                         errno = 0;
331                         ret = readdir_r(dir, &entry, &result);
332                         if (ret != 0) {
333                                 ret = WIMLIB_ERR_READ;
334                                 ERROR_WITH_ERRNO("Error reading the "
335                                                  "directory `%s'",
336                                                  root_disk_path);
337                                 break;
338                         }
339                         if (result == NULL)
340                                 break;
341                         if (result->d_name[0] == '.' && (result->d_name[1] == '\0'
342                               || (result->d_name[1] == '.' && result->d_name[2] == '\0')))
343                                         continue;
344                         strcpy(name + len + 1, result->d_name);
345                         ret = unix_build_dentry_tree(&child, name,
346                                                      lookup_table,
347                                                      NULL, config,
348                                                      add_image_flags,
349                                                      progress_func, NULL);
350                         if (ret != 0)
351                                 break;
352                         if (child)
353                                 dentry_add_child(root, child);
354                 }
355                 closedir(dir);
356         } else { /* Archiving a symbolic link */
357                 inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
358                 inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
359
360                 /* The idea here is to call readlink() to get the UNIX target of
361                  * the symbolic link, then turn the target into a reparse point
362                  * data buffer that contains a relative or absolute symbolic
363                  * link (NOT a junction point or *full* path symbolic link with
364                  * drive letter).
365                  */
366
367                 mbchar deref_name_buf[4096];
368                 ssize_t deref_name_len;
369
370                 deref_name_len = readlink(root_disk_path, deref_name_buf,
371                                           sizeof(deref_name_buf) - 1);
372                 if (deref_name_len >= 0) {
373                         deref_name_buf[deref_name_len] = '\0';
374                         DEBUG("Read symlink `%s'", deref_name_buf);
375                         ret = inode_set_symlink(root->d_inode, deref_name_buf,
376                                                 lookup_table, NULL);
377                         if (ret == 0) {
378                                 /*
379                                  * Unfortunately, Windows seems to have the
380                                  * concept of "file" symbolic links as being
381                                  * different from "directory" symbolic links...
382                                  * so FILE_ATTRIBUTE_DIRECTORY needs to be set
383                                  * on the symbolic link if the *target* of the
384                                  * symbolic link is a directory.
385                                  */
386                                 struct stat stbuf;
387                                 if (stat(root_disk_path, &stbuf) == 0 &&
388                                     S_ISDIR(stbuf.st_mode))
389                                 {
390                                         inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
391                                 }
392                         }
393                 } else {
394                         ERROR_WITH_ERRNO("Failed to read target of "
395                                          "symbolic link `%s'", root_disk_path);
396                         ret = WIMLIB_ERR_READLINK;
397                 }
398         }
399 out:
400         if (ret == 0)
401                 *root_ret = root;
402         else
403                 free_dentry_tree(root, lookup_table);
404         return ret;
405 }
406 #endif /* !__WIN32__ */
407
408 enum pattern_type {
409         NONE = 0,
410         EXCLUSION_LIST,
411         EXCLUSION_EXCEPTION,
412         COMPRESSION_EXCLUSION_LIST,
413         ALIGNMENT_LIST,
414 };
415
416 #define COMPAT_DEFAULT_CONFIG
417
418 /* Default capture configuration file when none is specified. */
419 static const mbchar *default_config =
420 #ifdef COMPAT_DEFAULT_CONFIG /* XXX: This policy is being moved to library
421                                 users.  The next ABI-incompatible library
422                                 version will default to the empty string here. */
423 "[ExclusionList]\n"
424 "\\$ntfs.log\n"
425 "\\hiberfil.sys\n"
426 "\\pagefile.sys\n"
427 "\\System Volume Information\n"
428 "\\RECYCLER\n"
429 "\\Windows\\CSC\n"
430 "\n"
431 "[CompressionExclusionList]\n"
432 "*.mp3\n"
433 "*.zip\n"
434 "*.cab\n"
435 "\\WINDOWS\\inf\\*.pnf\n";
436 #else
437 "";
438 #endif
439
440 static void
441 destroy_pattern_list(struct pattern_list *list)
442 {
443         FREE(list->pats);
444 }
445
446 static void
447 destroy_capture_config(struct capture_config *config)
448 {
449         destroy_pattern_list(&config->exclusion_list);
450         destroy_pattern_list(&config->exclusion_exception);
451         destroy_pattern_list(&config->compression_exclusion_list);
452         destroy_pattern_list(&config->alignment_list);
453         FREE(config->config_str);
454         FREE(config->prefix);
455         memset(config, 0, sizeof(*config));
456 }
457
458 static int
459 pattern_list_add_pattern(struct pattern_list *list, const mbchar *pattern)
460 {
461         const char **pats;
462         if (list->num_pats >= list->num_allocated_pats) {
463                 pats = REALLOC(list->pats,
464                                sizeof(list->pats[0]) * (list->num_allocated_pats + 8));
465                 if (!pats)
466                         return WIMLIB_ERR_NOMEM;
467                 list->num_allocated_pats += 8;
468                 list->pats = pats;
469         }
470         list->pats[list->num_pats++] = pattern;
471         return 0;
472 }
473
474 /* Parses the contents of the image capture configuration file and fills in a
475  * `struct capture_config'. */
476 static int
477 init_capture_config(struct capture_config *config,
478                     const mbchar *_config_str, size_t config_len)
479 {
480         mbchar *config_str;
481         mbchar *p;
482         mbchar *eol;
483         mbchar *next_p;
484         size_t bytes_remaining;
485         enum pattern_type type = NONE;
486         int ret;
487         unsigned long line_no = 0;
488
489         DEBUG("config_len = %zu", config_len);
490         bytes_remaining = config_len;
491         memset(config, 0, sizeof(*config));
492         config_str = MALLOC(config_len);
493         if (!config_str) {
494                 ERROR("Could not duplicate capture config string");
495                 return WIMLIB_ERR_NOMEM;
496         }
497
498         memcpy(config_str, _config_str, config_len);
499         next_p = config_str;
500         config->config_str = config_str;
501         while (bytes_remaining) {
502                 line_no++;
503                 p = next_p;
504                 eol = memchr(p, '\n', bytes_remaining);
505                 if (!eol) {
506                         ERROR("Expected end-of-line in capture config file on "
507                               "line %lu", line_no);
508                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
509                         goto out_destroy;
510                 }
511
512                 next_p = eol + 1;
513                 bytes_remaining -= (next_p - p);
514                 if (eol == p)
515                         continue;
516
517                 if (*(eol - 1) == '\r')
518                         eol--;
519                 *eol = '\0';
520
521                 /* Translate backslash to forward slash */
522                 for (mbchar *pp = p; pp != eol; pp++)
523                         if (*pp == '\\')
524                                 *pp = '/';
525
526                 /* Remove drive letter */
527                 if (eol - p > 2 && isalpha(*p) && *(p + 1) == ':')
528                         p += 2;
529
530                 ret = 0;
531                 if (strcmp(p, "[ExclusionList]") == 0)
532                         type = EXCLUSION_LIST;
533                 else if (strcmp(p, "[ExclusionException]") == 0)
534                         type = EXCLUSION_EXCEPTION;
535                 else if (strcmp(p, "[CompressionExclusionList]") == 0)
536                         type = COMPRESSION_EXCLUSION_LIST;
537                 else if (strcmp(p, "[AlignmentList]") == 0)
538                         type = ALIGNMENT_LIST;
539                 else if (p[0] == '[' && strrchr(p, ']')) {
540                         ERROR("Unknown capture configuration section `%s'", p);
541                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
542                 } else switch (type) {
543                 case EXCLUSION_LIST:
544                         DEBUG("Adding pattern \"%s\" to exclusion list", p);
545                         ret = pattern_list_add_pattern(&config->exclusion_list, p);
546                         break;
547                 case EXCLUSION_EXCEPTION:
548                         DEBUG("Adding pattern \"%s\" to exclusion exception list", p);
549                         ret = pattern_list_add_pattern(&config->exclusion_exception, p);
550                         break;
551                 case COMPRESSION_EXCLUSION_LIST:
552                         DEBUG("Adding pattern \"%s\" to compression exclusion list", p);
553                         ret = pattern_list_add_pattern(&config->compression_exclusion_list, p);
554                         break;
555                 case ALIGNMENT_LIST:
556                         DEBUG("Adding pattern \"%s\" to alignment list", p);
557                         ret = pattern_list_add_pattern(&config->alignment_list, p);
558                         break;
559                 default:
560                         ERROR("Line %lu of capture configuration is not "
561                               "in a block (such as [ExclusionList])",
562                               line_no);
563                         ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
564                         break;
565                 }
566                 if (ret != 0)
567                         goto out_destroy;
568         }
569         return 0;
570 out_destroy:
571         destroy_capture_config(config);
572         return ret;
573 }
574
575 static int capture_config_set_prefix(struct capture_config *config,
576                                      const mbchar *_prefix)
577 {
578         mbchar *prefix = STRDUP(_prefix);
579
580         if (!prefix)
581                 return WIMLIB_ERR_NOMEM;
582         FREE(config->prefix);
583         config->prefix = prefix;
584         config->prefix_len = strlen(prefix);
585         return 0;
586 }
587
588 static bool match_pattern(const mbchar *path,
589                           const mbchar *path_basename,
590                           const struct pattern_list *list)
591 {
592         for (size_t i = 0; i < list->num_pats; i++) {
593                 const char *pat = list->pats[i];
594                 const char *string;
595                 if (pat[0] == '/')
596                         /* Absolute path from root of capture */
597                         string = path;
598                 else {
599                         if (strchr(pat, '/'))
600                                 /* Relative path from root of capture */
601                                 string = path + 1;
602                         else
603                                 /* A file name pattern */
604                                 string = path_basename;
605                 }
606
607                 /* Warning: on Windows native builds, fnmatch() calls the
608                  * replacement function in win32.c. */
609                 if (fnmatch(pat, string, FNM_PATHNAME
610                                 #ifdef FNM_CASEFOLD
611                                         | FNM_CASEFOLD
612                                 #endif
613                             ) == 0)
614                 {
615                         DEBUG("`%s' matches the pattern \"%s\"",
616                               string, pat);
617                         return true;
618                 }
619         }
620         return false;
621 }
622
623 /* Return true if the image capture configuration file indicates we should
624  * exclude the filename @path from capture.
625  *
626  * If @exclude_prefix is %true, the part of the path up and including the name
627  * of the directory being captured is not included in the path for matching
628  * purposes.  This allows, for example, a pattern like /hiberfil.sys to match a
629  * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7
630  * directory.
631  */
632 bool
633 exclude_path(const mbchar *path, const struct capture_config *config,
634              bool exclude_prefix)
635 {
636         const mbchar *basename = path_basename(path);
637         if (exclude_prefix) {
638                 wimlib_assert(strlen(path) >= config->prefix_len);
639                 if (memcmp(config->prefix, path, config->prefix_len) == 0
640                      && path[config->prefix_len] == '/')
641                         path += config->prefix_len;
642         }
643         return match_pattern(path, basename, &config->exclusion_list) &&
644                 !match_pattern(path, basename, &config->exclusion_exception);
645
646 }
647
648 /* Strip leading and trailing forward slashes from a string.  Modifies it in
649  * place and returns the stripped string. */
650 static const char *
651 canonicalize_target_path(char *target_path)
652 {
653         char *p;
654         if (target_path == NULL)
655                 return "";
656         for (;;) {
657                 if (*target_path == '\0')
658                         return target_path;
659                 else if (*target_path == '/')
660                         target_path++;
661                 else
662                         break;
663         }
664
665         p = target_path + strlen(target_path) - 1;
666         while (*p == '/')
667                 *p-- = '\0';
668         return target_path;
669 }
670
671 #ifdef __WIN32__
672 static void
673 zap_backslashes(char *s)
674 {
675         while (*s) {
676                 if (*s == '\\')
677                         *s = '/';
678                 s++;
679         }
680 }
681 #endif
682
683 /* Strip leading and trailing slashes from the target paths */
684 static void
685 canonicalize_targets(struct wimlib_capture_source *sources, size_t num_sources)
686 {
687         while (num_sources--) {
688                 DEBUG("Canonicalizing { source: \"%s\", target=\"%s\"}",
689                       sources->fs_source_path,
690                       sources->wim_target_path);
691 #ifdef __WIN32__
692                 /* The Windows API can handle forward slashes.  Just get rid of
693                  * backslashes to avoid confusing other parts of the library
694                  * code. */
695                 zap_backslashes(sources->fs_source_path);
696                 if (sources->wim_target_path)
697                         zap_backslashes(sources->wim_target_path);
698 #endif
699                 sources->wim_target_path =
700                         (char*)canonicalize_target_path(sources->wim_target_path);
701                 DEBUG("Canonical target: \"%s\"", sources->wim_target_path);
702                 sources++;
703         }
704 }
705
706 static int
707 capture_source_cmp(const void *p1, const void *p2)
708 {
709         const struct wimlib_capture_source *s1 = p1, *s2 = p2;
710         return strcmp(s1->wim_target_path, s2->wim_target_path);
711 }
712
713 /* Sorts the capture sources lexicographically by target path.  This occurs
714  * after leading and trailing forward slashes are stripped.
715  *
716  * One purpose of this is to make sure that target paths that are inside other
717  * target paths are added after the containing target paths. */
718 static void
719 sort_sources(struct wimlib_capture_source *sources, size_t num_sources)
720 {
721         qsort(sources, num_sources, sizeof(sources[0]), capture_source_cmp);
722 }
723
724 static int
725 check_sorted_sources(struct wimlib_capture_source *sources, size_t num_sources,
726                      int add_image_flags)
727 {
728         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
729                 if (num_sources != 1) {
730                         ERROR("Must specify exactly 1 capture source "
731                               "(the NTFS volume) in NTFS mode!");
732                         return WIMLIB_ERR_INVALID_PARAM;
733                 }
734                 if (sources[0].wim_target_path[0] != '\0') {
735                         ERROR("In NTFS capture mode the target path inside "
736                               "the image must be the root directory!");
737                         return WIMLIB_ERR_INVALID_PARAM;
738                 }
739         } else if (num_sources != 0) {
740                 /* This code is disabled because the current code
741                  * unconditionally attempts to do overlays.  So, duplicate
742                  * target paths are OK. */
743         #if 0
744                 if (num_sources > 1 && sources[0].wim_target_path[0] == '\0') {
745                         ERROR("Cannot specify root target when using multiple "
746                               "capture sources!");
747                         return WIMLIB_ERR_INVALID_PARAM;
748                 }
749                 for (size_t i = 0; i < num_sources - 1; i++) {
750                         size_t len = strlen(sources[i].wim_target_path);
751                         size_t j = i + 1;
752                         const char *target1 = sources[i].wim_target_path;
753                         do {
754                                 const char *target2 = sources[j].wim_target_path;
755                                 DEBUG("target1=%s, target2=%s",
756                                       target1,target2);
757                                 if (strncmp(target1, target2, len) ||
758                                     target2[len] > '/')
759                                         break;
760                                 if (target2[len] == '/') {
761                                         ERROR("Invalid target `%s': is a prefix of `%s'",
762                                               target1, target2);
763                                         return WIMLIB_ERR_INVALID_PARAM;
764                                 }
765                                 if (target2[len] == '\0') {
766                                         ERROR("Invalid target `%s': is a duplicate of `%s'",
767                                               target1, target2);
768                                         return WIMLIB_ERR_INVALID_PARAM;
769                                 }
770                         } while (++j != num_sources);
771                 }
772         #endif
773         }
774         return 0;
775
776 }
777
778 /* Creates a new directory to place in the WIM image.  This is to create parent
779  * directories that are not part of any target as needed.  */
780 static struct wim_dentry *
781 new_filler_directory(const mbchar *name)
782 {
783         struct wim_dentry *dentry;
784         DEBUG("Creating filler directory \"%s\"", name);
785         dentry = new_dentry_with_inode(name);
786         if (dentry) {
787                 /* Leave the inode number as 0 for now.  The final inode number
788                  * will be assigned later by assign_inode_numbers(). */
789                 dentry->d_inode->i_resolved = 1;
790                 dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
791         }
792         return dentry;
793 }
794
795 /* Transfers the children of @branch to @target.  It is an error if @target is
796  * not a directory or if both @branch and @target contain a child dentry with
797  * the same name. */
798 static int
799 do_overlay(struct wim_dentry *target, struct wim_dentry *branch)
800 {
801         struct rb_root *rb_root;
802
803         DEBUG("Doing overlay \"%W\" => \"%W\"",
804               branch->file_name, target->file_name);
805
806         if (!dentry_is_directory(target)) {
807                 ERROR("Cannot overlay directory \"%W\" over non-directory",
808                       branch->file_name);
809                 return WIMLIB_ERR_INVALID_OVERLAY;
810         }
811
812         rb_root = &branch->d_inode->i_children;
813         while (rb_root->rb_node) { /* While @branch has children... */
814                 struct wim_dentry *child = rbnode_dentry(rb_root->rb_node);
815                 /* Move @child to the directory @target */
816                 unlink_dentry(child);
817                 if (!dentry_add_child(target, child)) {
818                         /* Revert the change to avoid leaking the directory tree
819                          * rooted at @child */
820                         dentry_add_child(branch, child);
821                         ERROR("Overlay error: file \"%W\" already exists "
822                               "as a child of \"%W\"",
823                               child->file_name, target->file_name);
824                         return WIMLIB_ERR_INVALID_OVERLAY;
825                 }
826         }
827         free_dentry(branch);
828         return 0;
829
830 }
831
832 /* Attach or overlay a branch onto the WIM image.
833  *
834  * @root_p:
835  *      Pointer to the root of the WIM image, or pointer to NULL if it has not
836  *      been created yet.
837  * @branch
838  *      Branch to add.
839  * @target_path:
840  *      Path in the WIM image to add the branch, with leading and trailing
841  *      slashes stripped.
842  */
843 static int
844 attach_branch(struct wim_dentry **root_p, struct wim_dentry *branch,
845               mbchar *target_path)
846 {
847         char *slash;
848         struct wim_dentry *dentry, *parent, *target;
849
850         DEBUG("Attaching branch \"%W\" => \"%s\"",
851               branch->file_name, target_path);
852
853         if (*target_path == '\0') {
854                 /* Target: root directory */
855                 if (*root_p) {
856                         /* Overlay on existing root */
857                         return do_overlay(*root_p, branch);
858                 } else  {
859                         /* Set as root */
860                         *root_p = branch;
861                         return 0;
862                 }
863         }
864
865         /* Adding a non-root branch.  Create root if it hasn't been created
866          * already. */
867         if (!*root_p) {
868                 *root_p = new_filler_directory("");
869                 if (!*root_p)
870                         return WIMLIB_ERR_NOMEM;
871         }
872
873         /* Walk the path to the branch, creating filler directories as needed.
874          * */
875         parent = *root_p;
876         while ((slash = strchr(target_path, '/'))) {
877                 *slash = '\0';
878                 dentry = get_dentry_child_with_name(parent, target_path);
879                 if (!dentry) {
880                         dentry = new_filler_directory(target_path);
881                         if (!dentry)
882                                 return WIMLIB_ERR_NOMEM;
883                         dentry_add_child(parent, dentry);
884                 }
885                 parent = dentry;
886                 target_path = slash;
887                 /* Skip over slashes.  Note: this cannot overrun the length of
888                  * the string because the last character cannot be a slash, as
889                  * trailing slashes were tripped.  */
890                 do {
891                         ++target_path;
892                 } while (*target_path == '/');
893         }
894
895         /* If the target path already existed, overlay the branch onto it.
896          * Otherwise, set the branch as the target path. */
897         target = get_dentry_child_with_utf16le_name(parent, branch->file_name);
898         if (target) {
899                 return do_overlay(target, branch);
900         } else {
901                 dentry_add_child(parent, branch);
902                 return 0;
903         }
904 }
905
906 WIMLIBAPI int
907 wimlib_add_image_multisource(WIMStruct *w,
908                              struct wimlib_capture_source *sources,
909                              size_t num_sources,
910                              const utf8char *name,
911                              const mbchar *config_str,
912                              size_t config_len,
913                              int add_image_flags,
914                              wimlib_progress_func_t progress_func)
915 {
916         int (*capture_tree)(struct wim_dentry **,
917                             const mbchar *,
918                             struct wim_lookup_table *,
919                             struct wim_security_data *,
920                             const struct capture_config *,
921                             int,
922                             wimlib_progress_func_t,
923                             void *);
924         void *extra_arg;
925         struct wim_dentry *root_dentry;
926         struct wim_dentry *branch;
927         struct wim_security_data *sd;
928         struct capture_config config;
929         struct wim_image_metadata *imd;
930         int ret;
931
932         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
933 #ifdef WITH_NTFS_3G
934                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
935                         ERROR("Cannot dereference files when capturing directly from NTFS");
936                         return WIMLIB_ERR_INVALID_PARAM;
937                 }
938                 if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
939                         ERROR("Capturing UNIX owner and mode not supported "
940                               "when capturing directly from NTFS");
941                         return WIMLIB_ERR_INVALID_PARAM;
942                 }
943                 capture_tree = build_dentry_tree_ntfs;
944                 extra_arg = &w->ntfs_vol;
945 #else
946                 ERROR("wimlib was compiled without support for NTFS-3g, so\n"
947                       "        cannot capture a WIM image directly from a NTFS volume!");
948                 return WIMLIB_ERR_UNSUPPORTED;
949 #endif
950         } else {
951         #ifdef __WIN32__
952                 capture_tree = win32_build_dentry_tree;
953         #else
954                 capture_tree = unix_build_dentry_tree;
955         #endif
956                 extra_arg = NULL;
957         }
958
959 #ifdef __WIN32__
960         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_UNIX_DATA) {
961                 ERROR("Capturing UNIX-specific data is not supported on Windows");
962                 return WIMLIB_ERR_INVALID_PARAM;
963         }
964         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE) {
965                 ERROR("Dereferencing symbolic links is not supported on Windows");
966                 return WIMLIB_ERR_INVALID_PARAM;
967         }
968 #endif
969
970         if (!name || !*name) {
971                 ERROR("Must specify a non-empty string for the image name");
972                 return WIMLIB_ERR_INVALID_PARAM;
973         }
974
975         if (w->hdr.total_parts != 1) {
976                 ERROR("Cannot add an image to a split WIM");
977                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
978         }
979
980         if (wimlib_image_name_in_use(w, name)) {
981                 ERROR("There is already an image named \"%s\" in `%s'",
982                       name, w->filename);
983                 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
984         }
985
986         if (!config_str) {
987                 DEBUG("Using default capture configuration");
988                 config_str = default_config;
989                 config_len = strlen(default_config);
990         }
991         ret = init_capture_config(&config, config_str, config_len);
992         if (ret)
993                 goto out;
994
995         DEBUG("Allocating security data");
996         sd = CALLOC(1, sizeof(struct wim_security_data));
997         if (!sd) {
998                 ret = WIMLIB_ERR_NOMEM;
999                 goto out_destroy_capture_config;
1000         }
1001         sd->total_length = 8;
1002         sd->refcnt = 1;
1003
1004         DEBUG("Using %zu capture sources", num_sources);
1005         canonicalize_targets(sources, num_sources);
1006         sort_sources(sources, num_sources);
1007         ret = check_sorted_sources(sources, num_sources, add_image_flags);
1008         if (ret) {
1009                 ret = WIMLIB_ERR_INVALID_PARAM;
1010                 goto out_free_security_data;
1011         }
1012
1013         DEBUG("Building dentry tree.");
1014         if (num_sources == 0) {
1015                 root_dentry = new_filler_directory("");
1016                 if (!root_dentry) {
1017                         ret = WIMLIB_ERR_NOMEM;
1018                         goto out_free_security_data;
1019                 }
1020         } else {
1021                 size_t i;
1022
1023         #ifdef __WIN32__
1024                 win32_acquire_capture_privileges();
1025         #endif
1026
1027                 root_dentry = NULL;
1028                 i = 0;
1029                 do {
1030                         int flags;
1031                         union wimlib_progress_info progress;
1032
1033                         DEBUG("Building dentry tree for source %zu of %zu "
1034                               "(\"%s\" => \"%s\")", i + 1, num_sources,
1035                               sources[i].fs_source_path,
1036                               sources[i].wim_target_path);
1037                         if (progress_func) {
1038                                 memset(&progress, 0, sizeof(progress));
1039                                 progress.scan.source = sources[i].fs_source_path;
1040                                 progress.scan.wim_target_path = sources[i].wim_target_path;
1041                                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_BEGIN, &progress);
1042                         }
1043                         ret = capture_config_set_prefix(&config,
1044                                                         sources[i].fs_source_path);
1045                         if (ret)
1046                                 goto out_free_dentry_tree;
1047                         flags = add_image_flags | WIMLIB_ADD_IMAGE_FLAG_SOURCE;
1048                         if (!*sources[i].wim_target_path)
1049                                 flags |= WIMLIB_ADD_IMAGE_FLAG_ROOT;
1050                         ret = (*capture_tree)(&branch, sources[i].fs_source_path,
1051                                               w->lookup_table, sd,
1052                                               &config,
1053                                               flags,
1054                                               progress_func, extra_arg);
1055                         if (ret) {
1056                                 ERROR("Failed to build dentry tree for `%s'",
1057                                       sources[i].fs_source_path);
1058                                 goto out_free_dentry_tree;
1059                         }
1060                         if (branch) {
1061                                 /* Use the target name, not the source name, for
1062                                  * the root of each branch from a capture
1063                                  * source.  (This will also set the root dentry
1064                                  * of the entire image to be unnamed.) */
1065                                 ret = set_dentry_name(branch,
1066                                                       path_basename(sources[i].wim_target_path));
1067                                 if (ret)
1068                                         goto out_free_branch;
1069
1070                                 ret = attach_branch(&root_dentry, branch,
1071                                                     sources[i].wim_target_path);
1072                                 if (ret)
1073                                         goto out_free_branch;
1074                         }
1075                         if (progress_func)
1076                                 progress_func(WIMLIB_PROGRESS_MSG_SCAN_END, &progress);
1077                 } while (++i != num_sources);
1078         }
1079
1080         DEBUG("Calculating full paths of dentries.");
1081         ret = for_dentry_in_tree(root_dentry, calculate_dentry_full_path, NULL);
1082         if (ret)
1083                 goto out_free_dentry_tree;
1084
1085         ret = add_new_dentry_tree(w, root_dentry, sd);
1086         if (ret)
1087                 goto out_free_dentry_tree;
1088
1089         imd = &w->image_metadata[w->hdr.image_count - 1];
1090
1091         ret = dentry_tree_fix_inodes(root_dentry, &imd->inode_list);
1092         if (ret)
1093                 goto out_destroy_imd;
1094
1095         DEBUG("Assigning hard link group IDs");
1096         assign_inode_numbers(&imd->inode_list);
1097
1098         ret = xml_add_image(w, name);
1099         if (ret)
1100                 goto out_destroy_imd;
1101
1102         if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_BOOT)
1103                 wimlib_set_boot_idx(w, w->hdr.image_count);
1104         ret = 0;
1105         goto out_destroy_capture_config;
1106 out_destroy_imd:
1107         destroy_image_metadata(&w->image_metadata[w->hdr.image_count - 1],
1108                                w->lookup_table);
1109         w->hdr.image_count--;
1110         goto out;
1111 out_free_branch:
1112         free_dentry_tree(branch, w->lookup_table);
1113 out_free_dentry_tree:
1114         free_dentry_tree(root_dentry, w->lookup_table);
1115 out_free_security_data:
1116         free_security_data(sd);
1117 out_destroy_capture_config:
1118         destroy_capture_config(&config);
1119 out:
1120 #ifdef __WIN32__
1121         win32_release_capture_privileges();
1122 #endif
1123         return ret;
1124 }
1125
1126 WIMLIBAPI int
1127 wimlib_add_image(WIMStruct *w,
1128                  const mbchar *source,
1129                  const utf8char *name,
1130                  const mbchar *config_str,
1131                  size_t config_len,
1132                  int add_image_flags,
1133                  wimlib_progress_func_t progress_func)
1134 {
1135         if (!source || !*source)
1136                 return WIMLIB_ERR_INVALID_PARAM;
1137
1138         char *fs_source_path = STRDUP(source);
1139         int ret;
1140         struct wimlib_capture_source capture_src = {
1141                 .fs_source_path = fs_source_path,
1142                 .wim_target_path = NULL,
1143                 .reserved = 0,
1144         };
1145         ret = wimlib_add_image_multisource(w, &capture_src, 1, name,
1146                                            config_str, config_len,
1147                                            add_image_flags, progress_func);
1148         FREE(fs_source_path);
1149         return ret;
1150 }