]> wimlib.net Git - wimlib/blob - src/unix_capture.c
Support for file exclusions via progress function
[wimlib] / src / unix_capture.c
1 /*
2  * unix_capture.c:  Capture a directory tree on UNIX.
3  */
4
5 /*
6  * Copyright (C) 2012, 2013, 2014 Eric Biggers
7  *
8  * This file is part of wimlib, a library for working with WIM files.
9  *
10  * wimlib is free software; you can redistribute it and/or modify it under the
11  * terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option)
13  * any later version.
14  *
15  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17  * A PARTICULAR PURPOSE. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with wimlib; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #ifndef __WIN32__
25
26 #ifdef HAVE_CONFIG_H
27 #  include "config.h"
28 #endif
29
30 #include <dirent.h>
31 #include <errno.h>
32 #include <fcntl.h>
33 #include <limits.h> /* for PATH_MAX */
34 #include <sys/stat.h>
35 #include <unistd.h>
36
37 #include "wimlib/capture.h"
38 #include "wimlib/dentry.h"
39 #include "wimlib/error.h"
40 #include "wimlib/lookup_table.h"
41 #include "wimlib/reparse.h"
42 #include "wimlib/timestamp.h"
43 #include "wimlib/unix_data.h"
44
45 #ifdef HAVE_FDOPENDIR
46 #  define my_fdopendir(dirfd_p) fdopendir(*(dirfd_p))
47 #else
48 static DIR *
49 my_fdopendir(int *dirfd_p)
50 {
51         DIR *dir = NULL;
52         int old_pwd;
53
54         old_pwd = open(".", O_RDONLY);
55         if (old_pwd >= 0) {
56                 if (!fchdir(*dirfd_p)) {
57                         dir = opendir(".");
58                         if (dir) {
59                                 close(*dirfd_p);
60                                 *dirfd_p = dirfd(dir);
61                         }
62                         fchdir(old_pwd);
63                 }
64                 close(old_pwd);
65         }
66         return dir;
67 }
68 #endif
69
70 #ifdef HAVE_OPENAT
71 #  define my_openat(full_path, dirfd, relpath, flags) \
72                 openat((dirfd), (relpath), (flags))
73 #else
74 #  define my_openat(full_path, dirfd, relpath, flags) \
75                 open((full_path), (flags))
76 #endif
77
78 #ifdef HAVE_READLINKAT
79 #  define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
80                 readlinkat((dirfd), (relpath), (buf), (bufsize))
81 #else
82 #  define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
83                 readlink((full_path), (buf), (bufsize))
84 #endif
85
86 #ifdef HAVE_FSTATAT
87 #  define my_fstatat(full_path, dirfd, relpath, stbuf, flags)   \
88         fstatat((dirfd), (relpath), (stbuf), (flags))
89 #else
90 #  define my_fstatat(full_path, dirfd, relpath, stbuf, flags)   \
91         ((flags) & AT_SYMLINK_NOFOLLOW) ? \
92                 lstat((full_path), (stbuf)) : \
93                 stat((full_path), (stbuf))
94 #endif
95
96 #ifndef AT_FDCWD
97 #  define AT_FDCWD      -100
98 #endif
99
100 #ifndef AT_SYMLINK_NOFOLLOW
101 #  define AT_SYMLINK_NOFOLLOW   0x100
102 #endif
103
104 static int
105 unix_scan_regular_file(const char *path, u64 size, struct wim_inode *inode,
106                        struct list_head *unhashed_streams)
107 {
108         struct wim_lookup_table_entry *lte;
109         char *file_on_disk;
110
111         inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
112
113         /* Empty files do not have to have a lookup table entry. */
114         if (!size)
115                 return 0;
116
117         file_on_disk = STRDUP(path);
118         if (!file_on_disk)
119                 return WIMLIB_ERR_NOMEM;
120         lte = new_lookup_table_entry();
121         if (!lte) {
122                 FREE(file_on_disk);
123                 return WIMLIB_ERR_NOMEM;
124         }
125         lte->file_on_disk = file_on_disk;
126         lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
127         lte->size = size;
128         add_unhashed_stream(lte, inode, 0, unhashed_streams);
129         inode->i_lte = lte;
130         return 0;
131 }
132
133 static int
134 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
135                                  char *path, size_t path_len,
136                                  int dirfd, const char *relpath,
137                                  struct add_image_params *params);
138
139 static int
140 unix_scan_directory(struct wim_dentry *dir_dentry,
141                     char *full_path, size_t full_path_len,
142                     int parent_dirfd, const char *dir_relpath,
143                     struct add_image_params *params)
144 {
145
146         int dirfd;
147         DIR *dir;
148         int ret;
149
150         dirfd = my_openat(full_path, parent_dirfd, dir_relpath, O_RDONLY);
151         if (dirfd < 0) {
152                 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
153                 return WIMLIB_ERR_OPENDIR;
154         }
155
156         dir_dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
157         dir = my_fdopendir(&dirfd);
158         if (!dir) {
159                 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
160                 close(dirfd);
161                 return WIMLIB_ERR_OPENDIR;
162         }
163
164         ret = 0;
165         for (;;) {
166                 struct dirent *entry;
167                 struct wim_dentry *child;
168                 size_t name_len;
169
170                 errno = 0;
171                 entry = readdir(dir);
172                 if (!entry) {
173                         if (errno) {
174                                 ret = WIMLIB_ERR_READ;
175                                 ERROR_WITH_ERRNO("\"%s\": Error reading directory",
176                                                  full_path);
177                         }
178                         break;
179                 }
180
181                 if (entry->d_name[0] == '.' &&
182                     (entry->d_name[1] == '\0' ||
183                      (entry->d_name[1] == '.' && entry->d_name[2] == '\0')))
184                         continue;
185
186                 full_path[full_path_len] = '/';
187                 name_len = strlen(entry->d_name);
188                 memcpy(&full_path[full_path_len + 1], entry->d_name, name_len + 1);
189                 ret = unix_build_dentry_tree_recursive(&child,
190                                                        full_path,
191                                                        full_path_len + 1 + name_len,
192                                                        dirfd,
193                                                        &full_path[full_path_len + 1],
194                                                        params);
195                 full_path[full_path_len] = '\0';
196                 if (ret)
197                         break;
198                 if (child)
199                         dentry_add_child(dir_dentry, child);
200         }
201         closedir(dir);
202         return ret;
203 }
204
205 /* Given an absolute symbolic link target @dest (UNIX-style, beginning
206  * with '/'), determine whether it points into the directory specified by
207  * @ino and @dev.  If so, return the target modified to be "absolute"
208  * relative to this directory.  Otherwise, return NULL.  */
209 static char *
210 unix_fixup_abslink(char *dest, u64 ino, u64 dev)
211 {
212         char *p = dest;
213
214         do {
215                 char save;
216                 struct stat stbuf;
217                 int ret;
218
219                 /* Skip non-slashes.  */
220                 while (*p && *p != '/')
221                         p++;
222
223                 /* Skip slashes.  */
224                 while (*p && *p == '/')
225                         p++;
226
227                 /* Get inode and device for this prefix.  */
228                 save = *p;
229                 *p = '\0';
230                 ret = stat(dest, &stbuf);
231                 *p = save;
232
233                 if (ret) {
234                         /* stat() failed.  Assume the link points outside the
235                          * directory tree being captured.  */
236                         break;
237                 }
238
239                 if (stbuf.st_ino == ino && stbuf.st_dev == dev) {
240                         /* Link points inside directory tree being captured.
241                          * Return abbreviated path.  */
242                         *--p = '/';
243                         while (p > dest && *(p - 1) == '/')
244                                 p--;
245                         return p;
246                 }
247         } while (*p);
248
249         /* Link does not point inside directory tree being captured.  */
250         return NULL;
251 }
252
253 static int
254 unix_scan_symlink(const char *full_path, int dirfd, const char *relpath,
255                   struct wim_inode *inode, struct add_image_params *params)
256 {
257         char deref_name_buf[4096];
258         ssize_t deref_name_len;
259         char *dest;
260         int ret;
261
262         inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
263         inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
264
265         /* The idea here is to call readlink() to get the UNIX target of the
266          * symbolic link, then turn the target into a reparse point data buffer
267          * that contains a relative or absolute symbolic link. */
268         deref_name_len = my_readlinkat(full_path, dirfd, relpath,
269                                        deref_name_buf, sizeof(deref_name_buf) - 1);
270         if (deref_name_len < 0) {
271                 ERROR_WITH_ERRNO("\"%s\": Can't read target of symbolic link",
272                                  full_path);
273                 return WIMLIB_ERR_READLINK;
274         }
275
276         dest = deref_name_buf;
277
278         dest[deref_name_len] = '\0';
279
280         if ((params->add_flags & WIMLIB_ADD_FLAG_RPFIX) &&
281              dest[0] == '/')
282         {
283                 char *fixed_dest;
284
285                 /* RPFIX (reparse point fixup) mode:  Change target of absolute
286                  * symbolic link to be "absolute" relative to the tree being
287                  * captured.  */
288                 fixed_dest = unix_fixup_abslink(dest,
289                                                 params->capture_root_ino,
290                                                 params->capture_root_dev);
291                 params->progress.scan.cur_path = full_path;
292                 params->progress.scan.symlink_target = deref_name_buf;
293                 if (fixed_dest) {
294                         /* Link points inside the tree being captured, so it was
295                          * fixed.  */
296                         inode->i_not_rpfixed = 0;
297                         dest = fixed_dest;
298                         ret = do_capture_progress(params,
299                                                   WIMLIB_SCAN_DENTRY_FIXED_SYMLINK,
300                                                   NULL);
301                 } else {
302                         /* Link points outside the tree being captured, so it
303                          * was not fixed.  */
304                         ret = do_capture_progress(params,
305                                                   WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK,
306                                                   NULL);
307                 }
308                 if (ret)
309                         return ret;
310         }
311         ret = wim_inode_set_symlink(inode, dest, params->lookup_table);
312         if (ret)
313                 return ret;
314
315         /* Unfortunately, Windows seems to have the concept of "file" symbolic
316          * links as being different from "directory" symbolic links...  so
317          * FILE_ATTRIBUTE_DIRECTORY needs to be set on the symbolic link if the
318          * *target* of the symbolic link is a directory.  */
319         struct stat stbuf;
320         if (my_fstatat(full_path, dirfd, relpath, &stbuf, 0) == 0 &&
321             S_ISDIR(stbuf.st_mode))
322                 inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
323         return 0;
324 }
325
326 static int
327 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
328                                  char *full_path, size_t full_path_len,
329                                  int dirfd, const char *relpath,
330                                  struct add_image_params *params)
331 {
332         struct wim_dentry *tree = NULL;
333         struct wim_inode *inode = NULL;
334         int ret;
335         struct stat stbuf;
336         int stat_flags;
337
338         ret = try_exclude(full_path, full_path_len, params);
339         if (ret < 0) /* Excluded? */
340                 goto out_progress;
341         if (ret > 0) /* Error? */
342                 goto out;
343
344         if (params->add_flags & (WIMLIB_ADD_FLAG_DEREFERENCE |
345                                  WIMLIB_ADD_FLAG_ROOT))
346                 stat_flags = 0;
347         else
348                 stat_flags = AT_SYMLINK_NOFOLLOW;
349
350         ret = my_fstatat(full_path, dirfd, relpath, &stbuf, stat_flags);
351
352         if (ret) {
353                 ERROR_WITH_ERRNO("\"%s\": Can't read metadata", full_path);
354                 ret = WIMLIB_ERR_STAT;
355                 goto out;
356         }
357
358         if (!(params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA)) {
359                 if (unlikely(!S_ISREG(stbuf.st_mode) &&
360                              !S_ISDIR(stbuf.st_mode) &&
361                              !S_ISLNK(stbuf.st_mode)))
362                 {
363                         if (params->add_flags &
364                             WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE)
365                         {
366                                 ERROR("\"%s\": File type is unsupported",
367                                       full_path);
368                                 ret = WIMLIB_ERR_UNSUPPORTED_FILE;
369                                 goto out;
370                         }
371                         params->progress.scan.cur_path = full_path;
372                         ret = do_capture_progress(params,
373                                                   WIMLIB_SCAN_DENTRY_UNSUPPORTED,
374                                                   NULL);
375                         goto out;
376                 }
377         }
378
379         ret = inode_table_new_dentry(params->inode_table, relpath,
380                                      stbuf.st_ino, stbuf.st_dev,
381                                      S_ISDIR(stbuf.st_mode), &tree);
382         if (ret)
383                 goto out;
384
385         inode = tree->d_inode;
386
387         /* Already seen this inode?  */
388         if (inode->i_nlink > 1)
389                 goto out_progress;
390
391 #ifdef HAVE_STAT_NANOSECOND_PRECISION
392         inode->i_creation_time = timespec_to_wim_timestamp(stbuf.st_mtim);
393         inode->i_last_write_time = timespec_to_wim_timestamp(stbuf.st_mtim);
394         inode->i_last_access_time = timespec_to_wim_timestamp(stbuf.st_atim);
395 #else
396         inode->i_creation_time = unix_timestamp_to_wim(stbuf.st_mtime);
397         inode->i_last_write_time = unix_timestamp_to_wim(stbuf.st_mtime);
398         inode->i_last_access_time = unix_timestamp_to_wim(stbuf.st_atime);
399 #endif
400         inode->i_resolved = 1;
401         if (params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA) {
402                 struct wimlib_unix_data unix_data;
403
404                 unix_data.uid = stbuf.st_uid;
405                 unix_data.gid = stbuf.st_gid;
406                 unix_data.mode = stbuf.st_mode;
407                 unix_data.rdev = stbuf.st_rdev;
408                 if (!inode_set_unix_data(inode, &unix_data, UNIX_DATA_ALL)) {
409                         ret = WIMLIB_ERR_NOMEM;
410                         goto out;
411                 }
412         }
413
414         if (params->add_flags & WIMLIB_ADD_FLAG_ROOT) {
415                 params->capture_root_ino = stbuf.st_ino;
416                 params->capture_root_dev = stbuf.st_dev;
417                 params->add_flags &= ~WIMLIB_ADD_FLAG_ROOT;
418         }
419
420         if (S_ISREG(stbuf.st_mode)) {
421                 ret = unix_scan_regular_file(full_path, stbuf.st_size,
422                                              inode, params->unhashed_streams);
423         } else if (S_ISDIR(stbuf.st_mode)) {
424                 ret = unix_scan_directory(tree, full_path, full_path_len,
425                                           dirfd, relpath, params);
426         } else if (S_ISLNK(stbuf.st_mode)) {
427                 ret = unix_scan_symlink(full_path, dirfd, relpath,
428                                         inode, params);
429         }
430
431         if (ret)
432                 goto out;
433
434 out_progress:
435         params->progress.scan.cur_path = full_path;
436         if (likely(tree))
437                 ret = do_capture_progress(params, WIMLIB_SCAN_DENTRY_OK, inode);
438         else
439                 ret = do_capture_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
440 out:
441         if (likely(ret == 0))
442                 *tree_ret = tree;
443         else
444                 free_dentry_tree(tree, params->lookup_table);
445         return ret;
446 }
447
448 /*
449  * unix_build_dentry_tree():
450  *      Builds a tree of WIM dentries from an on-disk directory tree (UNIX
451  *      version; no NTFS-specific data is captured).
452  *
453  * @root_ret:   Place to return a pointer to the root of the dentry tree.  Only
454  *              modified if successful.  Set to NULL if the file or directory was
455  *              excluded from capture.
456  *
457  * @root_disk_path:  The path to the root of the directory tree on disk.
458  *
459  * @params:     See doc for `struct add_image_params'.
460  *
461  * @return:     0 on success, nonzero on failure.  It is a failure if any of
462  *              the files cannot be `stat'ed, or if any of the needed
463  *              directories cannot be opened or read.  Failure to add the files
464  *              to the WIM may still occur later when trying to actually read
465  *              the on-disk files during a call to wimlib_write() or
466  *              wimlib_overwrite().
467  */
468 int
469 unix_build_dentry_tree(struct wim_dentry **root_ret,
470                        const char *root_disk_path,
471                        struct add_image_params *params)
472 {
473         size_t path_len;
474         size_t path_bufsz;
475         char *path_buf;
476         int ret;
477
478         path_len = strlen(root_disk_path);
479         path_bufsz = min(32790, PATH_MAX + 1);
480
481         if (path_len >= path_bufsz)
482                 return WIMLIB_ERR_INVALID_PARAM;
483
484         path_buf = MALLOC(path_bufsz);
485         if (!path_buf)
486                 return WIMLIB_ERR_NOMEM;
487         memcpy(path_buf, root_disk_path, path_len + 1);
488
489         params->capture_root_nchars = path_len;
490
491         ret = unix_build_dentry_tree_recursive(root_ret, path_buf, path_len,
492                                                AT_FDCWD, path_buf, params);
493         FREE(path_buf);
494         return ret;
495 }
496
497 #endif /* !__WIN32__ */