Stream and blob updates
[wimlib] / src / unix_capture.c
1 /*
2  * unix_capture.c:  Capture a directory tree on UNIX.
3  */
4
5 /*
6  * Copyright (C) 2012, 2013, 2014 Eric Biggers
7  *
8  * This file is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU Lesser General Public License as published by the Free
10  * Software Foundation; either version 3 of the License, or (at your option) any
11  * later version.
12  *
13  * This file is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
16  * details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this file; if not, see http://www.gnu.org/licenses/.
20  */
21
22 #ifndef __WIN32__
23
24 #ifdef HAVE_CONFIG_H
25 #  include "config.h"
26 #endif
27
28 #include <dirent.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <limits.h> /* for PATH_MAX */
32 #include <sys/stat.h>
33 #include <unistd.h>
34
35 #include "wimlib/blob_table.h"
36 #include "wimlib/capture.h"
37 #include "wimlib/dentry.h"
38 #include "wimlib/error.h"
39 #include "wimlib/reparse.h"
40 #include "wimlib/timestamp.h"
41 #include "wimlib/unix_data.h"
42
43 #ifdef HAVE_FDOPENDIR
44 #  define my_fdopendir(dirfd_p) fdopendir(*(dirfd_p))
45 #else
46 static DIR *
47 my_fdopendir(int *dirfd_p)
48 {
49         DIR *dir = NULL;
50         int old_pwd;
51
52         old_pwd = open(".", O_RDONLY);
53         if (old_pwd >= 0) {
54                 if (!fchdir(*dirfd_p)) {
55                         dir = opendir(".");
56                         if (dir) {
57                                 close(*dirfd_p);
58                                 *dirfd_p = dirfd(dir);
59                         }
60                         fchdir(old_pwd);
61                 }
62                 close(old_pwd);
63         }
64         return dir;
65 }
66 #endif
67
68 #ifdef HAVE_OPENAT
69 #  define my_openat(full_path, dirfd, relpath, flags) \
70                 openat((dirfd), (relpath), (flags))
71 #else
72 #  define my_openat(full_path, dirfd, relpath, flags) \
73                 open((full_path), (flags))
74 #endif
75
76 #ifdef HAVE_READLINKAT
77 #  define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
78                 readlinkat((dirfd), (relpath), (buf), (bufsize))
79 #else
80 #  define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
81                 readlink((full_path), (buf), (bufsize))
82 #endif
83
84 #ifdef HAVE_FSTATAT
85 #  define my_fstatat(full_path, dirfd, relpath, stbuf, flags)   \
86         fstatat((dirfd), (relpath), (stbuf), (flags))
87 #else
88 #  define my_fstatat(full_path, dirfd, relpath, stbuf, flags)   \
89         ((flags) & AT_SYMLINK_NOFOLLOW) ? \
90                 lstat((full_path), (stbuf)) : \
91                 stat((full_path), (stbuf))
92 #endif
93
94 #ifndef AT_FDCWD
95 #  define AT_FDCWD      -100
96 #endif
97
98 #ifndef AT_SYMLINK_NOFOLLOW
99 #  define AT_SYMLINK_NOFOLLOW   0x100
100 #endif
101
102 static int
103 unix_scan_regular_file(const char *path, u64 size, struct wim_inode *inode,
104                        struct list_head *unhashed_blobs)
105 {
106         struct blob_descriptor *blob;
107         struct wim_inode_stream *strm;
108
109         inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
110
111         if (size) {
112                 char *file_on_disk = STRDUP(path);
113                 if (!file_on_disk)
114                         return WIMLIB_ERR_NOMEM;
115                 blob = new_blob_descriptor();
116                 if (!blob) {
117                         FREE(file_on_disk);
118                         return WIMLIB_ERR_NOMEM;
119                 }
120                 blob->file_on_disk = file_on_disk;
121                 blob->file_inode = inode;
122                 blob->blob_location = BLOB_IN_FILE_ON_DISK;
123                 blob->size = size;
124         } else {
125                 blob = NULL;
126         }
127
128         strm = inode_add_stream(inode, STREAM_TYPE_DATA, NO_STREAM_NAME, blob);
129         if (!strm) {
130                 free_blob_descriptor(blob);
131                 return WIMLIB_ERR_NOMEM;
132         }
133         prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
134         return 0;
135 }
136
137 static int
138 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
139                                  char *path, size_t path_len,
140                                  int dirfd, const char *relpath,
141                                  struct capture_params *params);
142
143 static int
144 unix_scan_directory(struct wim_dentry *dir_dentry,
145                     char *full_path, size_t full_path_len,
146                     int parent_dirfd, const char *dir_relpath,
147                     struct capture_params *params)
148 {
149
150         int dirfd;
151         DIR *dir;
152         int ret;
153
154         dirfd = my_openat(full_path, parent_dirfd, dir_relpath, O_RDONLY);
155         if (dirfd < 0) {
156                 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
157                 return WIMLIB_ERR_OPENDIR;
158         }
159
160         dir_dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
161         dir = my_fdopendir(&dirfd);
162         if (!dir) {
163                 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
164                 close(dirfd);
165                 return WIMLIB_ERR_OPENDIR;
166         }
167
168         ret = 0;
169         for (;;) {
170                 struct dirent *entry;
171                 struct wim_dentry *child;
172                 size_t name_len;
173
174                 errno = 0;
175                 entry = readdir(dir);
176                 if (!entry) {
177                         if (errno) {
178                                 ret = WIMLIB_ERR_READ;
179                                 ERROR_WITH_ERRNO("\"%s\": Error reading directory",
180                                                  full_path);
181                         }
182                         break;
183                 }
184
185                 if (entry->d_name[0] == '.' &&
186                     (entry->d_name[1] == '\0' ||
187                      (entry->d_name[1] == '.' && entry->d_name[2] == '\0')))
188                         continue;
189
190                 full_path[full_path_len] = '/';
191                 name_len = strlen(entry->d_name);
192                 memcpy(&full_path[full_path_len + 1], entry->d_name, name_len + 1);
193                 ret = unix_build_dentry_tree_recursive(&child,
194                                                        full_path,
195                                                        full_path_len + 1 + name_len,
196                                                        dirfd,
197                                                        &full_path[full_path_len + 1],
198                                                        params);
199                 full_path[full_path_len] = '\0';
200                 if (ret)
201                         break;
202                 if (child)
203                         dentry_add_child(dir_dentry, child);
204         }
205         closedir(dir);
206         return ret;
207 }
208
209 /* Given an absolute symbolic link target @dest (UNIX-style, beginning
210  * with '/'), determine whether it points into the directory specified by
211  * @ino and @dev.  If so, return the target modified to be "absolute"
212  * relative to this directory.  Otherwise, return NULL.  */
213 static char *
214 unix_fixup_abslink(char *dest, u64 ino, u64 dev)
215 {
216         char *p = dest;
217
218         do {
219                 char save;
220                 struct stat stbuf;
221                 int ret;
222
223                 /* Skip non-slashes.  */
224                 while (*p && *p != '/')
225                         p++;
226
227                 /* Skip slashes.  */
228                 while (*p && *p == '/')
229                         p++;
230
231                 /* Get inode and device for this prefix.  */
232                 save = *p;
233                 *p = '\0';
234                 ret = stat(dest, &stbuf);
235                 *p = save;
236
237                 if (ret) {
238                         /* stat() failed.  Assume the link points outside the
239                          * directory tree being captured.  */
240                         break;
241                 }
242
243                 if (stbuf.st_ino == ino && stbuf.st_dev == dev) {
244                         /* Link points inside directory tree being captured.
245                          * Return abbreviated path.  */
246                         *--p = '/';
247                         while (p > dest && *(p - 1) == '/')
248                                 p--;
249                         return p;
250                 }
251         } while (*p);
252
253         /* Link does not point inside directory tree being captured.  */
254         return NULL;
255 }
256
257 static int
258 unix_scan_symlink(const char *full_path, int dirfd, const char *relpath,
259                   struct wim_inode *inode, struct capture_params *params)
260 {
261         char deref_name_buf[4096];
262         ssize_t deref_name_len;
263         char *dest;
264         int ret;
265
266         inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
267         inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
268
269         /* The idea here is to call readlink() to get the UNIX target of the
270          * symbolic link, then turn the target into a reparse point data buffer
271          * that contains a relative or absolute symbolic link. */
272         deref_name_len = my_readlinkat(full_path, dirfd, relpath,
273                                        deref_name_buf, sizeof(deref_name_buf) - 1);
274         if (deref_name_len < 0) {
275                 ERROR_WITH_ERRNO("\"%s\": Can't read target of symbolic link",
276                                  full_path);
277                 return WIMLIB_ERR_READLINK;
278         }
279
280         dest = deref_name_buf;
281
282         dest[deref_name_len] = '\0';
283
284         if ((params->add_flags & WIMLIB_ADD_FLAG_RPFIX) &&
285              dest[0] == '/')
286         {
287                 char *fixed_dest;
288
289                 /* RPFIX (reparse point fixup) mode:  Change target of absolute
290                  * symbolic link to be "absolute" relative to the tree being
291                  * captured.  */
292                 fixed_dest = unix_fixup_abslink(dest,
293                                                 params->capture_root_ino,
294                                                 params->capture_root_dev);
295                 params->progress.scan.cur_path = full_path;
296                 params->progress.scan.symlink_target = deref_name_buf;
297                 if (fixed_dest) {
298                         /* Link points inside the tree being captured, so it was
299                          * fixed.  */
300                         inode->i_not_rpfixed = 0;
301                         dest = fixed_dest;
302                         ret = do_capture_progress(params,
303                                                   WIMLIB_SCAN_DENTRY_FIXED_SYMLINK,
304                                                   NULL);
305                 } else {
306                         /* Link points outside the tree being captured, so it
307                          * was not fixed.  */
308                         ret = do_capture_progress(params,
309                                                   WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK,
310                                                   NULL);
311                 }
312                 if (ret)
313                         return ret;
314         }
315         ret = wim_inode_set_symlink(inode, dest, params->blob_table);
316         if (ret)
317                 return ret;
318
319         /* Unfortunately, Windows seems to have the concept of "file" symbolic
320          * links as being different from "directory" symbolic links...  so
321          * FILE_ATTRIBUTE_DIRECTORY needs to be set on the symbolic link if the
322          * *target* of the symbolic link is a directory.  */
323         struct stat stbuf;
324         if (my_fstatat(full_path, dirfd, relpath, &stbuf, 0) == 0 &&
325             S_ISDIR(stbuf.st_mode))
326                 inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
327         return 0;
328 }
329
330 static int
331 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
332                                  char *full_path, size_t full_path_len,
333                                  int dirfd, const char *relpath,
334                                  struct capture_params *params)
335 {
336         struct wim_dentry *tree = NULL;
337         struct wim_inode *inode = NULL;
338         int ret;
339         struct stat stbuf;
340         int stat_flags;
341
342         ret = try_exclude(full_path, full_path_len, params);
343         if (ret < 0) /* Excluded? */
344                 goto out_progress;
345         if (ret > 0) /* Error? */
346                 goto out;
347
348         if (params->add_flags & (WIMLIB_ADD_FLAG_DEREFERENCE |
349                                  WIMLIB_ADD_FLAG_ROOT))
350                 stat_flags = 0;
351         else
352                 stat_flags = AT_SYMLINK_NOFOLLOW;
353
354         ret = my_fstatat(full_path, dirfd, relpath, &stbuf, stat_flags);
355
356         if (ret) {
357                 ERROR_WITH_ERRNO("\"%s\": Can't read metadata", full_path);
358                 ret = WIMLIB_ERR_STAT;
359                 goto out;
360         }
361
362         if (!(params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA)) {
363                 if (unlikely(!S_ISREG(stbuf.st_mode) &&
364                              !S_ISDIR(stbuf.st_mode) &&
365                              !S_ISLNK(stbuf.st_mode)))
366                 {
367                         if (params->add_flags &
368                             WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE)
369                         {
370                                 ERROR("\"%s\": File type is unsupported",
371                                       full_path);
372                                 ret = WIMLIB_ERR_UNSUPPORTED_FILE;
373                                 goto out;
374                         }
375                         params->progress.scan.cur_path = full_path;
376                         ret = do_capture_progress(params,
377                                                   WIMLIB_SCAN_DENTRY_UNSUPPORTED,
378                                                   NULL);
379                         goto out;
380                 }
381         }
382
383         ret = inode_table_new_dentry(params->inode_table, relpath,
384                                      stbuf.st_ino, stbuf.st_dev,
385                                      S_ISDIR(stbuf.st_mode), &tree);
386         if (ret)
387                 goto out;
388
389         inode = tree->d_inode;
390
391         /* Already seen this inode?  */
392         if (inode->i_nlink > 1)
393                 goto out_progress;
394
395 #ifdef HAVE_STAT_NANOSECOND_PRECISION
396         inode->i_creation_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
397         inode->i_last_write_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
398         inode->i_last_access_time = timespec_to_wim_timestamp(&stbuf.st_atim);
399 #else
400         inode->i_creation_time = time_t_to_wim_timestamp(stbuf.st_mtime);
401         inode->i_last_write_time = time_t_to_wim_timestamp(stbuf.st_mtime);
402         inode->i_last_access_time = time_t_to_wim_timestamp(stbuf.st_atime);
403 #endif
404         if (params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA) {
405                 struct wimlib_unix_data unix_data;
406
407                 unix_data.uid = stbuf.st_uid;
408                 unix_data.gid = stbuf.st_gid;
409                 unix_data.mode = stbuf.st_mode;
410                 unix_data.rdev = stbuf.st_rdev;
411                 if (!inode_set_unix_data(inode, &unix_data, UNIX_DATA_ALL)) {
412                         ret = WIMLIB_ERR_NOMEM;
413                         goto out;
414                 }
415         }
416
417         if (params->add_flags & WIMLIB_ADD_FLAG_ROOT) {
418                 params->capture_root_ino = stbuf.st_ino;
419                 params->capture_root_dev = stbuf.st_dev;
420                 params->add_flags &= ~WIMLIB_ADD_FLAG_ROOT;
421         }
422
423         if (S_ISREG(stbuf.st_mode)) {
424                 ret = unix_scan_regular_file(full_path, stbuf.st_size,
425                                              inode, params->unhashed_blobs);
426         } else if (S_ISDIR(stbuf.st_mode)) {
427                 ret = unix_scan_directory(tree, full_path, full_path_len,
428                                           dirfd, relpath, params);
429         } else if (S_ISLNK(stbuf.st_mode)) {
430                 ret = unix_scan_symlink(full_path, dirfd, relpath,
431                                         inode, params);
432         }
433
434         if (ret)
435                 goto out;
436
437 out_progress:
438         params->progress.scan.cur_path = full_path;
439         if (likely(tree))
440                 ret = do_capture_progress(params, WIMLIB_SCAN_DENTRY_OK, inode);
441         else
442                 ret = do_capture_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
443 out:
444         if (unlikely(ret)) {
445                 free_dentry_tree(tree, params->blob_table);
446                 tree = NULL;
447                 ret = report_capture_error(params, ret, full_path);
448         }
449         *tree_ret = tree;
450         return ret;
451 }
452
453 /*
454  * unix_build_dentry_tree():
455  *      Builds a tree of WIM dentries from an on-disk directory tree (UNIX
456  *      version; no NTFS-specific data is captured).
457  *
458  * @root_ret:   Place to return a pointer to the root of the dentry tree.  Only
459  *              modified if successful.  Set to NULL if the file or directory was
460  *              excluded from capture.
461  *
462  * @root_disk_path:  The path to the root of the directory tree on disk.
463  *
464  * @params:     See doc for `struct capture_params'.
465  *
466  * @return:     0 on success, nonzero on failure.  It is a failure if any of
467  *              the files cannot be `stat'ed, or if any of the needed
468  *              directories cannot be opened or read.  Failure to add the files
469  *              to the WIM may still occur later when trying to actually read
470  *              the on-disk files during a call to wimlib_write() or
471  *              wimlib_overwrite().
472  */
473 int
474 unix_build_dentry_tree(struct wim_dentry **root_ret,
475                        const char *root_disk_path,
476                        struct capture_params *params)
477 {
478         size_t path_len;
479         size_t path_bufsz;
480         char *path_buf;
481         int ret;
482
483         path_len = strlen(root_disk_path);
484         path_bufsz = min(32790, PATH_MAX + 1);
485
486         if (path_len >= path_bufsz)
487                 return WIMLIB_ERR_INVALID_PARAM;
488
489         path_buf = MALLOC(path_bufsz);
490         if (!path_buf)
491                 return WIMLIB_ERR_NOMEM;
492         memcpy(path_buf, root_disk_path, path_len + 1);
493
494         params->capture_root_nchars = path_len;
495
496         ret = unix_build_dentry_tree_recursive(root_ret, path_buf, path_len,
497                                                AT_FDCWD, path_buf, params);
498         FREE(path_buf);
499         return ret;
500 }
501
502 #endif /* !__WIN32__ */