]> wimlib.net Git - wimlib/blob - src/unix_capture.c
Improve random number generation
[wimlib] / src / unix_capture.c
1 /*
2  * unix_capture.c:  Capture a directory tree on UNIX.
3  */
4
5 /*
6  * Copyright (C) 2012-2016 Eric Biggers
7  *
8  * This file is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU Lesser General Public License as published by the Free
10  * Software Foundation; either version 3 of the License, or (at your option) any
11  * later version.
12  *
13  * This file is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
16  * details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this file; if not, see http://www.gnu.org/licenses/.
20  */
21
22 #ifndef __WIN32__
23
24 #ifdef HAVE_CONFIG_H
25 #  include "config.h"
26 #endif
27
28 #include <dirent.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <limits.h> /* for PATH_MAX */
32 #include <sys/stat.h>
33 #include <unistd.h>
34
35 #include "wimlib/blob_table.h"
36 #include "wimlib/dentry.h"
37 #include "wimlib/error.h"
38 #include "wimlib/reparse.h"
39 #include "wimlib/scan.h"
40 #include "wimlib/timestamp.h"
41 #include "wimlib/unix_data.h"
42
43 #ifdef HAVE_FDOPENDIR
44 #  define my_fdopendir(dirfd_p) fdopendir(*(dirfd_p))
45 #else
46 static DIR *
47 my_fdopendir(int *dirfd_p)
48 {
49         DIR *dir = NULL;
50         int old_pwd;
51
52         old_pwd = open(".", O_RDONLY);
53         if (old_pwd >= 0) {
54                 if (!fchdir(*dirfd_p)) {
55                         dir = opendir(".");
56                         if (dir) {
57                                 close(*dirfd_p);
58                                 *dirfd_p = dirfd(dir);
59                         }
60                         fchdir(old_pwd);
61                 }
62                 close(old_pwd);
63         }
64         return dir;
65 }
66 #endif
67
68 #ifdef HAVE_OPENAT
69 #  define my_openat(full_path, dirfd, relpath, flags) \
70                 openat((dirfd), (relpath), (flags))
71 #else
72 #  define my_openat(full_path, dirfd, relpath, flags) \
73                 open((full_path), (flags))
74 #endif
75
76 #ifdef HAVE_READLINKAT
77 #  define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
78                 readlinkat((dirfd), (relpath), (buf), (bufsize))
79 #else
80 #  define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
81                 readlink((full_path), (buf), (bufsize))
82 #endif
83
84 #ifdef HAVE_FSTATAT
85 #  define my_fstatat(full_path, dirfd, relpath, stbuf, flags)   \
86         fstatat((dirfd), (relpath), (stbuf), (flags))
87 #else
88 #  define my_fstatat(full_path, dirfd, relpath, stbuf, flags)   \
89         ((flags) & AT_SYMLINK_NOFOLLOW) ? \
90                 lstat((full_path), (stbuf)) : \
91                 stat((full_path), (stbuf))
92 #endif
93
94 #ifndef AT_FDCWD
95 #  define AT_FDCWD      -100
96 #endif
97
98 #ifndef AT_SYMLINK_NOFOLLOW
99 #  define AT_SYMLINK_NOFOLLOW   0x100
100 #endif
101
102 static int
103 unix_scan_regular_file(const char *path, u64 blocks, u64 size,
104                        struct wim_inode *inode,
105                        struct list_head *unhashed_blobs)
106 {
107         struct blob_descriptor *blob = NULL;
108         struct wim_inode_stream *strm;
109
110         /*
111          * Set FILE_ATTRIBUTE_SPARSE_FILE if the file uses less disk space than
112          * expected given its size.
113          */
114         if (blocks < DIV_ROUND_UP(size, 512))
115                 inode->i_attributes = FILE_ATTRIBUTE_SPARSE_FILE;
116         else
117                 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
118
119         if (size) {
120                 blob = new_blob_descriptor();
121                 if (unlikely(!blob))
122                         goto err_nomem;
123                 blob->file_on_disk = STRDUP(path);
124                 if (unlikely(!blob->file_on_disk))
125                         goto err_nomem;
126                 blob->blob_location = BLOB_IN_FILE_ON_DISK;
127                 blob->size = size;
128                 blob->file_inode = inode;
129         }
130
131         strm = inode_add_stream(inode, STREAM_TYPE_DATA, NO_STREAM_NAME, blob);
132         if (unlikely(!strm))
133                 goto err_nomem;
134
135         prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
136         return 0;
137
138 err_nomem:
139         free_blob_descriptor(blob);
140         return WIMLIB_ERR_NOMEM;
141 }
142
143 static int
144 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
145                                  char *path, size_t path_len,
146                                  int dirfd, const char *relpath,
147                                  struct scan_params *params);
148
149 static int
150 unix_scan_directory(struct wim_dentry *dir_dentry,
151                     char *full_path, size_t full_path_len,
152                     int parent_dirfd, const char *dir_relpath,
153                     struct scan_params *params)
154 {
155
156         int dirfd;
157         DIR *dir;
158         int ret;
159
160         dirfd = my_openat(full_path, parent_dirfd, dir_relpath, O_RDONLY);
161         if (dirfd < 0) {
162                 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
163                 return WIMLIB_ERR_OPENDIR;
164         }
165
166         dir_dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
167         dir = my_fdopendir(&dirfd);
168         if (!dir) {
169                 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
170                 close(dirfd);
171                 return WIMLIB_ERR_OPENDIR;
172         }
173
174         ret = 0;
175         for (;;) {
176                 struct dirent *entry;
177                 struct wim_dentry *child;
178                 size_t name_len;
179
180                 errno = 0;
181                 entry = readdir(dir);
182                 if (!entry) {
183                         if (errno) {
184                                 ret = WIMLIB_ERR_READ;
185                                 ERROR_WITH_ERRNO("\"%s\": Error reading directory",
186                                                  full_path);
187                         }
188                         break;
189                 }
190
191                 name_len = strlen(entry->d_name);
192
193                 if (should_ignore_filename(entry->d_name, name_len))
194                         continue;
195
196                 full_path[full_path_len] = '/';
197                 memcpy(&full_path[full_path_len + 1], entry->d_name, name_len + 1);
198                 ret = unix_build_dentry_tree_recursive(&child,
199                                                        full_path,
200                                                        full_path_len + 1 + name_len,
201                                                        dirfd,
202                                                        &full_path[full_path_len + 1],
203                                                        params);
204                 full_path[full_path_len] = '\0';
205                 if (ret)
206                         break;
207                 attach_scanned_tree(dir_dentry, child, params->blob_table);
208         }
209         closedir(dir);
210         return ret;
211 }
212
213 /*
214  * Given an absolute symbolic link target (UNIX-style, beginning with '/'),
215  * determine whether it points into the directory identified by @ino and @dev.
216  * If yes, return the suffix of @target which is relative to this directory, but
217  * retaining leading slashes.  If no, return @target.
218  *
219  * Here are some examples, assuming that the @ino/@dev directory is "/home/e":
220  *
221  *      Original target         New target
222  *      ---------------         ----------
223  *      /home/e/test            /test
224  *      /home/e/test/           /test/
225  *      //home//e//test//       //test//
226  *      /home/e                                         (empty string)
227  *      /home/e/                /
228  *      /usr/lib                /usr/lib                (external link)
229  *
230  * Because of the possibility of other links into the @ino/@dev directory and/or
231  * multiple path separators, we can't simply do a string comparison; instead we
232  * need to stat() each ancestor directory.
233  *
234  * If the link points directly to the @ino/@dev directory with no trailing
235  * slashes, then the new target will be an empty string.  This is not a valid
236  * UNIX symlink target, but we store this in the archive anyway since the target
237  * is intended to be de-relativized when the link is extracted.
238  */
239 static char *
240 unix_relativize_link_target(char *target, u64 ino, u64 dev)
241 {
242         char *p = target;
243
244         do {
245                 char save;
246                 struct stat stbuf;
247                 int ret;
248
249                 /* Skip slashes (guaranteed to be at least one here)  */
250                 do {
251                         p++;
252                 } while (*p == '/');
253
254                 /* End of string?  */
255                 if (!*p)
256                         break;
257
258                 /* Skip non-slashes (guaranteed to be at least one here)  */
259                 do {
260                         p++;
261                 } while (*p && *p != '/');
262
263                 /* Get the inode and device numbers for this prefix.  */
264                 save = *p;
265                 *p = '\0';
266                 ret = stat(target, &stbuf);
267                 *p = save;
268
269                 if (ret) {
270                         /* stat() failed.  Assume the link points outside the
271                          * directory tree being captured.  */
272                         break;
273                 }
274
275                 if (stbuf.st_ino == ino && stbuf.st_dev == dev) {
276                         /* Link points inside directory tree being captured.
277                          * Return abbreviated path.  */
278                         return p;
279                 }
280         } while (*p);
281
282         /* Link does not point inside directory tree being captured.  */
283         return target;
284 }
285
286 static noinline_for_stack int
287 unix_scan_symlink(const char *full_path, int dirfd, const char *relpath,
288                   struct wim_inode *inode, struct scan_params *params)
289 {
290         char orig_target[REPARSE_POINT_MAX_SIZE];
291         char *target = orig_target;
292         int ret;
293
294         /* Read the UNIX symbolic link target.  */
295         ret = my_readlinkat(full_path, dirfd, relpath, target,
296                             sizeof(orig_target));
297         if (unlikely(ret < 0)) {
298                 ERROR_WITH_ERRNO("\"%s\": Can't read target of symbolic link",
299                                  full_path);
300                 return WIMLIB_ERR_READLINK;
301         }
302         if (unlikely(ret >= sizeof(orig_target))) {
303                 ERROR("\"%s\": target of symbolic link is too long", full_path);
304                 return WIMLIB_ERR_READLINK;
305         }
306         target[ret] = '\0';
307
308         /* If the link is absolute and reparse point fixups are enabled, then
309          * change it to be "absolute" relative to the tree being captured.  */
310         if (target[0] == '/' && (params->add_flags & WIMLIB_ADD_FLAG_RPFIX)) {
311                 int status = WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK;
312
313                 params->progress.scan.cur_path = full_path;
314                 params->progress.scan.symlink_target = target;
315
316                 target = unix_relativize_link_target(target,
317                                                      params->capture_root_ino,
318                                                      params->capture_root_dev);
319                 if (target != orig_target) {
320                         /* Link target was fixed.  */
321                         inode->i_rp_flags &= ~WIM_RP_FLAG_NOT_FIXED;
322                         status = WIMLIB_SCAN_DENTRY_FIXED_SYMLINK;
323                 }
324                 ret = do_scan_progress(params, status, NULL);
325                 if (ret)
326                         return ret;
327         }
328
329         /* Translate the UNIX symlink target into a Windows reparse point.  */
330         ret = wim_inode_set_symlink(inode, target, params->blob_table);
331         if (unlikely(ret)) {
332                 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
333                         ERROR("\"%s\": target of symbolic link is not valid "
334                               "UTF-8.  This is not supported.", full_path);
335                 }
336                 return ret;
337         }
338
339         /* On Windows, a reparse point can be set on both directory and
340          * non-directory files.  Usually, a link that is intended to point to a
341          * (non-)directory is stored as a reparse point on a (non-)directory
342          * file.  Replicate this behavior by examining the target file.  */
343         struct stat stbuf;
344         if (my_fstatat(full_path, dirfd, relpath, &stbuf, 0) == 0 &&
345             S_ISDIR(stbuf.st_mode))
346                 inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
347         return 0;
348 }
349
350 static int
351 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
352                                  char *full_path, size_t full_path_len,
353                                  int dirfd, const char *relpath,
354                                  struct scan_params *params)
355 {
356         struct wim_dentry *tree = NULL;
357         struct wim_inode *inode = NULL;
358         int ret;
359         struct stat stbuf;
360         int stat_flags;
361
362         ret = try_exclude(full_path, params);
363         if (unlikely(ret < 0)) /* Excluded? */
364                 goto out_progress;
365         if (unlikely(ret > 0)) /* Error? */
366                 goto out;
367
368         if (params->add_flags & (WIMLIB_ADD_FLAG_DEREFERENCE |
369                                  WIMLIB_ADD_FLAG_ROOT))
370                 stat_flags = 0;
371         else
372                 stat_flags = AT_SYMLINK_NOFOLLOW;
373
374         ret = my_fstatat(full_path, dirfd, relpath, &stbuf, stat_flags);
375
376         if (ret) {
377                 ERROR_WITH_ERRNO("\"%s\": Can't read metadata", full_path);
378                 ret = WIMLIB_ERR_STAT;
379                 goto out;
380         }
381
382         if (!(params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA)) {
383                 if (unlikely(!S_ISREG(stbuf.st_mode) &&
384                              !S_ISDIR(stbuf.st_mode) &&
385                              !S_ISLNK(stbuf.st_mode)))
386                 {
387                         if (params->add_flags &
388                             WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE)
389                         {
390                                 ERROR("\"%s\": File type is unsupported",
391                                       full_path);
392                                 ret = WIMLIB_ERR_UNSUPPORTED_FILE;
393                                 goto out;
394                         }
395                         params->progress.scan.cur_path = full_path;
396                         ret = do_scan_progress(params,
397                                                WIMLIB_SCAN_DENTRY_UNSUPPORTED,
398                                                NULL);
399                         goto out;
400                 }
401         }
402
403         ret = inode_table_new_dentry(params->inode_table, relpath,
404                                      stbuf.st_ino, stbuf.st_dev, false, &tree);
405         if (unlikely(ret)) {
406                 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
407                         ERROR("\"%s\": filename is not valid UTF-8.  "
408                               "This is not supported.", full_path);
409                 }
410                 goto out;
411         }
412
413         inode = tree->d_inode;
414
415         /* Already seen this inode?  */
416         if (inode->i_nlink > 1)
417                 goto out_progress;
418
419 #ifdef HAVE_STAT_NANOSECOND_PRECISION
420         inode->i_creation_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
421         inode->i_last_write_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
422         inode->i_last_access_time = timespec_to_wim_timestamp(&stbuf.st_atim);
423 #else
424         inode->i_creation_time = time_t_to_wim_timestamp(stbuf.st_mtime);
425         inode->i_last_write_time = time_t_to_wim_timestamp(stbuf.st_mtime);
426         inode->i_last_access_time = time_t_to_wim_timestamp(stbuf.st_atime);
427 #endif
428         if (params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA) {
429                 struct wimlib_unix_data unix_data;
430
431                 unix_data.uid = stbuf.st_uid;
432                 unix_data.gid = stbuf.st_gid;
433                 unix_data.mode = stbuf.st_mode;
434                 unix_data.rdev = stbuf.st_rdev;
435                 if (!inode_set_unix_data(inode, &unix_data, UNIX_DATA_ALL)) {
436                         ret = WIMLIB_ERR_NOMEM;
437                         goto out;
438                 }
439         }
440
441         if (params->add_flags & WIMLIB_ADD_FLAG_ROOT) {
442                 params->capture_root_ino = stbuf.st_ino;
443                 params->capture_root_dev = stbuf.st_dev;
444                 params->add_flags &= ~WIMLIB_ADD_FLAG_ROOT;
445         }
446
447         if (S_ISREG(stbuf.st_mode)) {
448                 ret = unix_scan_regular_file(full_path, stbuf.st_blocks,
449                                              stbuf.st_size, inode,
450                                              params->unhashed_blobs);
451         } else if (S_ISDIR(stbuf.st_mode)) {
452                 ret = unix_scan_directory(tree, full_path, full_path_len,
453                                           dirfd, relpath, params);
454         } else if (S_ISLNK(stbuf.st_mode)) {
455                 ret = unix_scan_symlink(full_path, dirfd, relpath,
456                                         inode, params);
457         }
458
459         if (ret)
460                 goto out;
461
462 out_progress:
463         params->progress.scan.cur_path = full_path;
464         if (likely(tree))
465                 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_OK, inode);
466         else
467                 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
468 out:
469         if (unlikely(ret)) {
470                 free_dentry_tree(tree, params->blob_table);
471                 tree = NULL;
472                 ret = report_scan_error(params, ret, full_path);
473         }
474         *tree_ret = tree;
475         return ret;
476 }
477
478 /*
479  * unix_build_dentry_tree():
480  *      Builds a tree of WIM dentries from an on-disk directory tree (UNIX
481  *      version; no NTFS-specific data is captured).
482  *
483  * @root_ret:   Place to return a pointer to the root of the dentry tree.  Set
484  *              to NULL if the file or directory was excluded from capture.
485  *
486  * @root_disk_path:  The path to the root of the directory tree on disk.
487  *
488  * @params:     See doc for `struct scan_params'.
489  *
490  * @return:     0 on success, nonzero on failure.  It is a failure if any of
491  *              the files cannot be `stat'ed, or if any of the needed
492  *              directories cannot be opened or read.  Failure to add the files
493  *              to the WIM may still occur later when trying to actually read
494  *              the on-disk files during a call to wimlib_write() or
495  *              wimlib_overwrite().
496  */
497 int
498 unix_build_dentry_tree(struct wim_dentry **root_ret,
499                        const char *root_disk_path, struct scan_params *params)
500 {
501         size_t path_len;
502         size_t path_bufsz;
503         char *path_buf;
504         int ret;
505
506         path_len = strlen(root_disk_path);
507         path_bufsz = min(32790, PATH_MAX + 1);
508
509         if (path_len >= path_bufsz)
510                 return WIMLIB_ERR_INVALID_PARAM;
511
512         path_buf = MALLOC(path_bufsz);
513         if (!path_buf)
514                 return WIMLIB_ERR_NOMEM;
515         memcpy(path_buf, root_disk_path, path_len + 1);
516
517         params->capture_root_nchars = path_len;
518
519         ret = unix_build_dentry_tree_recursive(root_ret, path_buf, path_len,
520                                                AT_FDCWD, path_buf, params);
521         FREE(path_buf);
522         return ret;
523 }
524
525 #endif /* !__WIN32__ */