2 * unix_capture.c: Capture a directory tree on UNIX.
6 * Copyright (C) 2012-2016 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see http://www.gnu.org/licenses/.
31 #include <limits.h> /* for PATH_MAX */
35 #include "wimlib/blob_table.h"
36 #include "wimlib/dentry.h"
37 #include "wimlib/error.h"
38 #include "wimlib/reparse.h"
39 #include "wimlib/scan.h"
40 #include "wimlib/timestamp.h"
41 #include "wimlib/unix_data.h"
44 # define my_fdopendir(dirfd_p) fdopendir(*(dirfd_p))
47 my_fdopendir(int *dirfd_p)
52 old_pwd = open(".", O_RDONLY);
54 if (!fchdir(*dirfd_p)) {
58 *dirfd_p = dirfd(dir);
69 # define my_openat(full_path, dirfd, relpath, flags) \
70 openat((dirfd), (relpath), (flags))
72 # define my_openat(full_path, dirfd, relpath, flags) \
73 open((full_path), (flags))
76 #ifdef HAVE_READLINKAT
77 # define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
78 readlinkat((dirfd), (relpath), (buf), (bufsize))
80 # define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
81 readlink((full_path), (buf), (bufsize))
85 # define my_fstatat(full_path, dirfd, relpath, stbuf, flags) \
86 fstatat((dirfd), (relpath), (stbuf), (flags))
88 # define my_fstatat(full_path, dirfd, relpath, stbuf, flags) \
89 ((flags) & AT_SYMLINK_NOFOLLOW) ? \
90 lstat((full_path), (stbuf)) : \
91 stat((full_path), (stbuf))
95 # define AT_FDCWD -100
98 #ifndef AT_SYMLINK_NOFOLLOW
99 # define AT_SYMLINK_NOFOLLOW 0x100
103 unix_scan_regular_file(const char *path, u64 blocks, u64 size,
104 struct wim_inode *inode,
105 struct list_head *unhashed_blobs)
107 struct blob_descriptor *blob = NULL;
108 struct wim_inode_stream *strm;
111 * Set FILE_ATTRIBUTE_SPARSE_FILE if the file uses less disk space than
112 * expected given its size.
114 if (blocks < DIV_ROUND_UP(size, 512))
115 inode->i_attributes = FILE_ATTRIBUTE_SPARSE_FILE;
117 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
120 blob = new_blob_descriptor();
123 blob->file_on_disk = STRDUP(path);
124 if (unlikely(!blob->file_on_disk))
126 blob->blob_location = BLOB_IN_FILE_ON_DISK;
128 blob->file_inode = inode;
131 strm = inode_add_stream(inode, STREAM_TYPE_DATA, NO_STREAM_NAME, blob);
135 prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
139 free_blob_descriptor(blob);
140 return WIMLIB_ERR_NOMEM;
144 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
145 char *path, size_t path_len,
146 int dirfd, const char *relpath,
147 struct scan_params *params);
150 unix_scan_directory(struct wim_dentry *dir_dentry,
151 char *full_path, size_t full_path_len,
152 int parent_dirfd, const char *dir_relpath,
153 struct scan_params *params)
160 dirfd = my_openat(full_path, parent_dirfd, dir_relpath, O_RDONLY);
162 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
163 return WIMLIB_ERR_OPENDIR;
166 dir_dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
167 dir = my_fdopendir(&dirfd);
169 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
171 return WIMLIB_ERR_OPENDIR;
176 struct dirent *entry;
177 struct wim_dentry *child;
181 entry = readdir(dir);
184 ret = WIMLIB_ERR_READ;
185 ERROR_WITH_ERRNO("\"%s\": Error reading directory",
191 name_len = strlen(entry->d_name);
193 if (should_ignore_filename(entry->d_name, name_len))
196 full_path[full_path_len] = '/';
197 memcpy(&full_path[full_path_len + 1], entry->d_name, name_len + 1);
198 ret = unix_build_dentry_tree_recursive(&child,
200 full_path_len + 1 + name_len,
202 &full_path[full_path_len + 1],
204 full_path[full_path_len] = '\0';
207 attach_scanned_tree(dir_dentry, child, params->blob_table);
214 * Given an absolute symbolic link target (UNIX-style, beginning with '/'),
215 * determine whether it points into the directory identified by @ino and @dev.
216 * If yes, return the suffix of @target which is relative to this directory, but
217 * retaining leading slashes. If no, return @target.
219 * Here are some examples, assuming that the @ino/@dev directory is "/home/e":
221 * Original target New target
222 * --------------- ----------
224 * /home/e/test/ /test/
225 * //home//e//test// //test//
226 * /home/e (empty string)
228 * /usr/lib /usr/lib (external link)
230 * Because of the possibility of other links into the @ino/@dev directory and/or
231 * multiple path separators, we can't simply do a string comparison; instead we
232 * need to stat() each ancestor directory.
234 * If the link points directly to the @ino/@dev directory with no trailing
235 * slashes, then the new target will be an empty string. This is not a valid
236 * UNIX symlink target, but we store this in the archive anyway since the target
237 * is intended to be de-relativized when the link is extracted.
240 unix_relativize_link_target(char *target, u64 ino, u64 dev)
249 /* Skip slashes (guaranteed to be at least one here) */
258 /* Skip non-slashes (guaranteed to be at least one here) */
261 } while (*p && *p != '/');
263 /* Get the inode and device numbers for this prefix. */
266 ret = stat(target, &stbuf);
270 /* stat() failed. Assume the link points outside the
271 * directory tree being captured. */
275 if (stbuf.st_ino == ino && stbuf.st_dev == dev) {
276 /* Link points inside directory tree being captured.
277 * Return abbreviated path. */
282 /* Link does not point inside directory tree being captured. */
286 static noinline_for_stack int
287 unix_scan_symlink(const char *full_path, int dirfd, const char *relpath,
288 struct wim_inode *inode, struct scan_params *params)
290 char orig_target[REPARSE_POINT_MAX_SIZE];
291 char *target = orig_target;
294 /* Read the UNIX symbolic link target. */
295 ret = my_readlinkat(full_path, dirfd, relpath, target,
296 sizeof(orig_target));
297 if (unlikely(ret < 0)) {
298 ERROR_WITH_ERRNO("\"%s\": Can't read target of symbolic link",
300 return WIMLIB_ERR_READLINK;
302 if (unlikely(ret >= sizeof(orig_target))) {
303 ERROR("\"%s\": target of symbolic link is too long", full_path);
304 return WIMLIB_ERR_READLINK;
308 /* If the link is absolute and reparse point fixups are enabled, then
309 * change it to be "absolute" relative to the tree being captured. */
310 if (target[0] == '/' && (params->add_flags & WIMLIB_ADD_FLAG_RPFIX)) {
311 int status = WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK;
313 params->progress.scan.cur_path = full_path;
314 params->progress.scan.symlink_target = target;
316 target = unix_relativize_link_target(target,
317 params->capture_root_ino,
318 params->capture_root_dev);
319 if (target != orig_target) {
320 /* Link target was fixed. */
321 inode->i_rp_flags &= ~WIM_RP_FLAG_NOT_FIXED;
322 status = WIMLIB_SCAN_DENTRY_FIXED_SYMLINK;
324 ret = do_scan_progress(params, status, NULL);
329 /* Translate the UNIX symlink target into a Windows reparse point. */
330 ret = wim_inode_set_symlink(inode, target, params->blob_table);
332 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
333 ERROR("\"%s\": target of symbolic link is not valid "
334 "UTF-8. This is not supported.", full_path);
339 /* On Windows, a reparse point can be set on both directory and
340 * non-directory files. Usually, a link that is intended to point to a
341 * (non-)directory is stored as a reparse point on a (non-)directory
342 * file. Replicate this behavior by examining the target file. */
344 if (my_fstatat(full_path, dirfd, relpath, &stbuf, 0) == 0 &&
345 S_ISDIR(stbuf.st_mode))
346 inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
351 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
352 char *full_path, size_t full_path_len,
353 int dirfd, const char *relpath,
354 struct scan_params *params)
356 struct wim_dentry *tree = NULL;
357 struct wim_inode *inode = NULL;
362 ret = try_exclude(full_path, params);
363 if (unlikely(ret < 0)) /* Excluded? */
365 if (unlikely(ret > 0)) /* Error? */
368 if (params->add_flags & (WIMLIB_ADD_FLAG_DEREFERENCE |
369 WIMLIB_ADD_FLAG_ROOT))
372 stat_flags = AT_SYMLINK_NOFOLLOW;
374 ret = my_fstatat(full_path, dirfd, relpath, &stbuf, stat_flags);
377 ERROR_WITH_ERRNO("\"%s\": Can't read metadata", full_path);
378 ret = WIMLIB_ERR_STAT;
382 if (!(params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA)) {
383 if (unlikely(!S_ISREG(stbuf.st_mode) &&
384 !S_ISDIR(stbuf.st_mode) &&
385 !S_ISLNK(stbuf.st_mode)))
387 if (params->add_flags &
388 WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE)
390 ERROR("\"%s\": File type is unsupported",
392 ret = WIMLIB_ERR_UNSUPPORTED_FILE;
395 params->progress.scan.cur_path = full_path;
396 ret = do_scan_progress(params,
397 WIMLIB_SCAN_DENTRY_UNSUPPORTED,
403 ret = inode_table_new_dentry(params->inode_table, relpath,
404 stbuf.st_ino, stbuf.st_dev, false, &tree);
406 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
407 ERROR("\"%s\": filename is not valid UTF-8. "
408 "This is not supported.", full_path);
413 inode = tree->d_inode;
415 /* Already seen this inode? */
416 if (inode->i_nlink > 1)
419 #ifdef HAVE_STAT_NANOSECOND_PRECISION
420 inode->i_creation_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
421 inode->i_last_write_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
422 inode->i_last_access_time = timespec_to_wim_timestamp(&stbuf.st_atim);
424 inode->i_creation_time = time_t_to_wim_timestamp(stbuf.st_mtime);
425 inode->i_last_write_time = time_t_to_wim_timestamp(stbuf.st_mtime);
426 inode->i_last_access_time = time_t_to_wim_timestamp(stbuf.st_atime);
428 if (params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA) {
429 struct wimlib_unix_data unix_data;
431 unix_data.uid = stbuf.st_uid;
432 unix_data.gid = stbuf.st_gid;
433 unix_data.mode = stbuf.st_mode;
434 unix_data.rdev = stbuf.st_rdev;
435 if (!inode_set_unix_data(inode, &unix_data, UNIX_DATA_ALL)) {
436 ret = WIMLIB_ERR_NOMEM;
441 if (params->add_flags & WIMLIB_ADD_FLAG_ROOT) {
442 params->capture_root_ino = stbuf.st_ino;
443 params->capture_root_dev = stbuf.st_dev;
444 params->add_flags &= ~WIMLIB_ADD_FLAG_ROOT;
447 if (S_ISREG(stbuf.st_mode)) {
448 ret = unix_scan_regular_file(full_path, stbuf.st_blocks,
449 stbuf.st_size, inode,
450 params->unhashed_blobs);
451 } else if (S_ISDIR(stbuf.st_mode)) {
452 ret = unix_scan_directory(tree, full_path, full_path_len,
453 dirfd, relpath, params);
454 } else if (S_ISLNK(stbuf.st_mode)) {
455 ret = unix_scan_symlink(full_path, dirfd, relpath,
463 params->progress.scan.cur_path = full_path;
465 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_OK, inode);
467 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
470 free_dentry_tree(tree, params->blob_table);
472 ret = report_scan_error(params, ret, full_path);
479 * unix_build_dentry_tree():
480 * Builds a tree of WIM dentries from an on-disk directory tree (UNIX
481 * version; no NTFS-specific data is captured).
483 * @root_ret: Place to return a pointer to the root of the dentry tree. Set
484 * to NULL if the file or directory was excluded from capture.
486 * @root_disk_path: The path to the root of the directory tree on disk.
488 * @params: See doc for `struct scan_params'.
490 * @return: 0 on success, nonzero on failure. It is a failure if any of
491 * the files cannot be `stat'ed, or if any of the needed
492 * directories cannot be opened or read. Failure to add the files
493 * to the WIM may still occur later when trying to actually read
494 * the on-disk files during a call to wimlib_write() or
495 * wimlib_overwrite().
498 unix_build_dentry_tree(struct wim_dentry **root_ret,
499 const char *root_disk_path, struct scan_params *params)
506 path_len = strlen(root_disk_path);
507 path_bufsz = min(32790, PATH_MAX + 1);
509 if (path_len >= path_bufsz)
510 return WIMLIB_ERR_INVALID_PARAM;
512 path_buf = MALLOC(path_bufsz);
514 return WIMLIB_ERR_NOMEM;
515 memcpy(path_buf, root_disk_path, path_len + 1);
517 params->capture_root_nchars = path_len;
519 ret = unix_build_dentry_tree_recursive(root_ret, path_buf, path_len,
520 AT_FDCWD, path_buf, params);
525 #endif /* !__WIN32__ */