2 * unix_capture.c: Capture a directory tree on UNIX.
6 * Copyright (C) 2012-2018 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see https://www.gnu.org/licenses/.
32 #include <sys/types.h>
33 #ifdef HAVE_SYS_XATTR_H
34 # include <sys/xattr.h>
38 #include "wimlib/blob_table.h"
39 #include "wimlib/dentry.h"
40 #include "wimlib/error.h"
41 #include "wimlib/reparse.h"
42 #include "wimlib/scan.h"
43 #include "wimlib/timestamp.h"
44 #include "wimlib/unix_data.h"
45 #include "wimlib/xattr.h"
48 # define my_fdopendir(dirfd_p) fdopendir(*(dirfd_p))
51 my_fdopendir(int *dirfd_p)
56 old_pwd = open(".", O_RDONLY);
58 if (!fchdir(*dirfd_p)) {
62 *dirfd_p = dirfd(dir);
73 # define my_openat(full_path, dirfd, relpath, flags) \
74 openat((dirfd), (relpath), (flags))
76 # define my_openat(full_path, dirfd, relpath, flags) \
77 open((full_path), (flags))
80 #ifdef HAVE_READLINKAT
81 # define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
82 readlinkat((dirfd), (relpath), (buf), (bufsize))
84 # define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
85 readlink((full_path), (buf), (bufsize))
89 # define my_fstatat(full_path, dirfd, relpath, stbuf, flags) \
90 fstatat((dirfd), (relpath), (stbuf), (flags))
92 # define my_fstatat(full_path, dirfd, relpath, stbuf, flags) \
93 ((flags) & AT_SYMLINK_NOFOLLOW) ? \
94 lstat((full_path), (stbuf)) : \
95 stat((full_path), (stbuf))
99 # define AT_FDCWD -100
102 #ifndef AT_SYMLINK_NOFOLLOW
103 # define AT_SYMLINK_NOFOLLOW 0x100
106 #ifdef HAVE_LINUX_XATTR_SUPPORT
108 * Retrieves the values of the xattrs named by the null-terminated @names of the
109 * file at @path and serializes the xattr names and values into @entries. If
110 * successful, returns the number of bytes used in @entries. If unsuccessful,
111 * returns -1 and sets errno (ERANGE if @entries was too small).
114 gather_xattr_entries(const char *path, const char *names, size_t names_size,
115 void *entries, size_t entries_size)
117 const char * const names_end = names + names_size;
118 void * const entries_end = entries + entries_size;
119 const char *name = names;
120 struct wim_xattr_entry *entry = entries;
123 size_t name_len = strnlen(name, names_end - name);
127 if (name_len == 0 || name_len >= names_end - name) {
128 ERROR("\"%s\": malformed extended attribute names list",
134 if (name_len > WIM_XATTR_NAME_MAX) {
135 WARNING("\"%s\": name of extended attribute \"%s\" is too long to store",
141 * Take care to always call lgetxattr() with a nonzero size,
142 * since zero size means to return the value length only.
144 if (entries_end - (void *)entry <=
145 sizeof(*entry) + name_len + 1) {
150 entry->name_len = name_len;
152 value = mempcpy(entry->name, name, name_len + 1);
154 value_len = lgetxattr(path, name, value, entries_end - value);
156 if (errno != ERANGE) {
157 ERROR_WITH_ERRNO("\"%s\": unable to read extended attribute \"%s\"",
162 if (value_len > WIM_XATTR_SIZE_MAX) {
163 WARNING("\"%s\": value of extended attribute \"%s\" is too large to store",
167 entry->value_len = cpu_to_le16(value_len);
168 entry = value + value_len;
170 name += name_len + 1;
171 } while (name < names_end);
173 return (void *)entry - entries;
177 create_xattr_item(const char *path, struct wim_inode *inode,
178 const char *names, size_t names_size)
181 char *entries = _entries;
182 size_t entries_avail = ARRAY_LEN(_entries);
183 ssize_t entries_size;
187 /* Serialize the xattrs into @entries */
188 entries_size = gather_xattr_entries(path, names, names_size,
189 entries, entries_avail);
190 if (entries_size < 0) {
191 ret = WIMLIB_ERR_STAT;
194 /* Not enough space in @entries. Reallocate it. */
195 if (entries != _entries)
197 ret = WIMLIB_ERR_NOMEM;
199 entries = MALLOC(entries_avail);
205 /* Copy @entries into an xattr item associated with @inode */
206 if ((u32)entries_size != entries_size) {
207 ERROR("\"%s\": too much xattr data!", path);
208 ret = WIMLIB_ERR_STAT;
211 ret = WIMLIB_ERR_NOMEM;
212 if (!inode_set_xattrs(inode, entries, entries_size))
217 if (entries != _entries)
223 * If the file at @path has Linux-style extended attributes, read them into
224 * memory and add them to @inode as a tagged item.
226 static noinline_for_stack int
227 scan_linux_xattrs(const char *path, struct wim_inode *inode)
230 char *names = _names;
231 ssize_t names_size = ARRAY_LEN(_names);
235 /* Gather the names of the xattrs of the file at @path */
236 names_size = llistxattr(path, names, names_size);
237 if (names_size == 0) /* No xattrs? */
239 if (names_size < 0) {
240 /* xattrs unsupported or disabled? */
241 if (errno == ENOTSUP || errno == ENOSYS)
243 if (errno == ERANGE) {
245 * Not enough space in @names. Ask for how much space
246 * we need, then try again.
248 names_size = llistxattr(path, NULL, 0);
251 if (names_size > 0) {
254 names = MALLOC(names_size);
256 ret = WIMLIB_ERR_NOMEM;
262 /* Some other error occurred. */
263 ERROR_WITH_ERRNO("\"%s\": unable to list extended attributes",
265 ret = WIMLIB_ERR_STAT;
270 * We have a nonempty list of xattr names. Gather the xattr values and
271 * add them as a tagged item.
273 ret = create_xattr_item(path, inode, names, names_size);
279 #endif /* HAVE_LINUX_XATTR_SUPPORT */
282 unix_scan_regular_file(const char *path, u64 blocks, u64 size,
283 struct wim_inode *inode,
284 struct list_head *unhashed_blobs)
286 struct blob_descriptor *blob = NULL;
287 struct wim_inode_stream *strm;
290 * Set FILE_ATTRIBUTE_SPARSE_FILE if the file uses less disk space than
291 * expected given its size.
293 if (blocks < DIV_ROUND_UP(size, 512))
294 inode->i_attributes = FILE_ATTRIBUTE_SPARSE_FILE;
296 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
299 blob = new_blob_descriptor();
302 blob->file_on_disk = STRDUP(path);
303 if (unlikely(!blob->file_on_disk))
305 blob->blob_location = BLOB_IN_FILE_ON_DISK;
307 blob->file_inode = inode;
310 strm = inode_add_stream(inode, STREAM_TYPE_DATA, NO_STREAM_NAME, blob);
314 prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
318 free_blob_descriptor(blob);
319 return WIMLIB_ERR_NOMEM;
323 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
324 int dirfd, const char *relpath,
325 struct scan_params *params);
328 unix_scan_directory(struct wim_dentry *dir_dentry,
329 int parent_dirfd, const char *dir_relpath,
330 struct scan_params *params)
337 dirfd = my_openat(params->cur_path, parent_dirfd, dir_relpath, O_RDONLY);
339 ERROR_WITH_ERRNO("\"%s\": Can't open directory",
341 return WIMLIB_ERR_OPENDIR;
344 dir_dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
345 dir = my_fdopendir(&dirfd);
347 ERROR_WITH_ERRNO("\"%s\": Can't open directory",
350 return WIMLIB_ERR_OPENDIR;
355 struct dirent *entry;
356 struct wim_dentry *child;
358 size_t orig_path_len;
361 entry = readdir(dir);
364 ret = WIMLIB_ERR_READ;
365 ERROR_WITH_ERRNO("\"%s\": Error reading directory",
371 name_len = strlen(entry->d_name);
373 if (should_ignore_filename(entry->d_name, name_len))
376 ret = WIMLIB_ERR_NOMEM;
377 if (!pathbuf_append_name(params, entry->d_name, name_len,
380 ret = unix_build_dentry_tree_recursive(&child, dirfd,
381 entry->d_name, params);
382 pathbuf_truncate(params, orig_path_len);
385 attach_scanned_tree(dir_dentry, child, params->blob_table);
392 * Given an absolute symbolic link target (UNIX-style, beginning with '/'),
393 * determine whether it points into the directory identified by @ino and @dev.
394 * If yes, return the suffix of @target which is relative to this directory, but
395 * retaining leading slashes. If no, return @target.
397 * Here are some examples, assuming that the @ino/@dev directory is "/home/e":
399 * Original target New target
400 * --------------- ----------
402 * /home/e/test/ /test/
403 * //home//e//test// //test//
404 * /home/e (empty string)
406 * /usr/lib /usr/lib (external link)
408 * Because of the possibility of other links into the @ino/@dev directory and/or
409 * multiple path separators, we can't simply do a string comparison; instead we
410 * need to stat() each ancestor directory.
412 * If the link points directly to the @ino/@dev directory with no trailing
413 * slashes, then the new target will be an empty string. This is not a valid
414 * UNIX symlink target, but we store this in the archive anyway since the target
415 * is intended to be de-relativized when the link is extracted.
418 unix_relativize_link_target(char *target, u64 ino, u64 dev)
427 /* Skip slashes (guaranteed to be at least one here) */
436 /* Skip non-slashes (guaranteed to be at least one here) */
439 } while (*p && *p != '/');
441 /* Get the inode and device numbers for this prefix. */
444 ret = stat(target, &stbuf);
448 /* stat() failed. Assume the link points outside the
449 * directory tree being captured. */
453 if (stbuf.st_ino == ino && stbuf.st_dev == dev) {
454 /* Link points inside directory tree being captured.
455 * Return abbreviated path. */
460 /* Link does not point inside directory tree being captured. */
464 static noinline_for_stack int
465 unix_scan_symlink(int dirfd, const char *relpath,
466 struct wim_inode *inode, struct scan_params *params)
468 char orig_target[REPARSE_POINT_MAX_SIZE];
469 char *target = orig_target;
472 /* Read the UNIX symbolic link target. */
473 ret = my_readlinkat(params->cur_path, dirfd, relpath, target,
474 sizeof(orig_target));
475 if (unlikely(ret < 0)) {
476 ERROR_WITH_ERRNO("\"%s\": Can't read target of symbolic link",
478 return WIMLIB_ERR_READLINK;
480 if (unlikely(ret >= sizeof(orig_target))) {
481 ERROR("\"%s\": target of symbolic link is too long",
483 return WIMLIB_ERR_READLINK;
487 /* If the link is absolute and reparse point fixups are enabled, then
488 * change it to be "absolute" relative to the tree being captured. */
489 if (target[0] == '/' && (params->add_flags & WIMLIB_ADD_FLAG_RPFIX)) {
490 int status = WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK;
492 params->progress.scan.symlink_target = target;
494 target = unix_relativize_link_target(target,
495 params->capture_root_ino,
496 params->capture_root_dev);
497 if (target != orig_target) {
498 /* Link target was fixed. */
499 inode->i_rp_flags &= ~WIM_RP_FLAG_NOT_FIXED;
500 status = WIMLIB_SCAN_DENTRY_FIXED_SYMLINK;
502 ret = do_scan_progress(params, status, NULL);
507 /* Translate the UNIX symlink target into a Windows reparse point. */
508 ret = wim_inode_set_symlink(inode, target, params->blob_table);
510 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
511 ERROR("\"%s\": target of symbolic link is not valid "
512 "UTF-8. This is not supported.",
518 /* On Windows, a reparse point can be set on both directory and
519 * non-directory files. Usually, a link that is intended to point to a
520 * (non-)directory is stored as a reparse point on a (non-)directory
521 * file. Replicate this behavior by examining the target file. */
523 if (my_fstatat(params->cur_path, dirfd, relpath, &stbuf, 0) == 0 &&
524 S_ISDIR(stbuf.st_mode))
525 inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
530 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
531 int dirfd, const char *relpath,
532 struct scan_params *params)
534 struct wim_dentry *tree = NULL;
535 struct wim_inode *inode = NULL;
540 ret = try_exclude(params);
541 if (unlikely(ret < 0)) /* Excluded? */
543 if (unlikely(ret > 0)) /* Error? */
546 if (params->add_flags & (WIMLIB_ADD_FLAG_DEREFERENCE |
547 WIMLIB_ADD_FLAG_ROOT))
550 stat_flags = AT_SYMLINK_NOFOLLOW;
552 ret = my_fstatat(params->cur_path, dirfd, relpath, &stbuf, stat_flags);
555 ERROR_WITH_ERRNO("\"%s\": Can't read metadata",
557 ret = WIMLIB_ERR_STAT;
561 if (!(params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA)) {
562 if (unlikely(!S_ISREG(stbuf.st_mode) &&
563 !S_ISDIR(stbuf.st_mode) &&
564 !S_ISLNK(stbuf.st_mode)))
566 if (params->add_flags &
567 WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE)
569 ERROR("\"%s\": File type is unsupported",
571 ret = WIMLIB_ERR_UNSUPPORTED_FILE;
574 ret = do_scan_progress(params,
575 WIMLIB_SCAN_DENTRY_UNSUPPORTED,
581 ret = inode_table_new_dentry(params->inode_table, relpath,
582 stbuf.st_ino, stbuf.st_dev, false, &tree);
584 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
585 ERROR("\"%s\": filename is not valid UTF-8. "
586 "This is not supported.", params->cur_path);
591 inode = tree->d_inode;
593 /* Already seen this inode? */
594 if (inode->i_nlink > 1)
597 #ifdef HAVE_STAT_NANOSECOND_PRECISION
598 inode->i_creation_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
599 inode->i_last_write_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
600 inode->i_last_access_time = timespec_to_wim_timestamp(&stbuf.st_atim);
602 inode->i_creation_time = time_t_to_wim_timestamp(stbuf.st_mtime);
603 inode->i_last_write_time = time_t_to_wim_timestamp(stbuf.st_mtime);
604 inode->i_last_access_time = time_t_to_wim_timestamp(stbuf.st_atime);
606 if (params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA) {
607 struct wimlib_unix_data unix_data;
609 unix_data.uid = stbuf.st_uid;
610 unix_data.gid = stbuf.st_gid;
611 unix_data.mode = stbuf.st_mode;
612 unix_data.rdev = stbuf.st_rdev;
613 if (!inode_set_unix_data(inode, &unix_data, UNIX_DATA_ALL)) {
614 ret = WIMLIB_ERR_NOMEM;
617 #ifdef HAVE_LINUX_XATTR_SUPPORT
618 ret = scan_linux_xattrs(params->cur_path, inode);
624 if (params->add_flags & WIMLIB_ADD_FLAG_ROOT) {
625 params->capture_root_ino = stbuf.st_ino;
626 params->capture_root_dev = stbuf.st_dev;
627 params->add_flags &= ~WIMLIB_ADD_FLAG_ROOT;
630 if (S_ISREG(stbuf.st_mode)) {
631 ret = unix_scan_regular_file(params->cur_path, stbuf.st_blocks,
632 stbuf.st_size, inode,
633 params->unhashed_blobs);
634 } else if (S_ISDIR(stbuf.st_mode)) {
635 ret = unix_scan_directory(tree, dirfd, relpath, params);
636 } else if (S_ISLNK(stbuf.st_mode)) {
637 ret = unix_scan_symlink(dirfd, relpath, inode, params);
645 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_OK, inode);
647 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
650 free_dentry_tree(tree, params->blob_table);
652 ret = report_scan_error(params, ret);
659 * unix_build_dentry_tree():
660 * Builds a tree of WIM dentries from an on-disk directory tree (UNIX
661 * version; no NTFS-specific data is captured).
663 * @root_ret: Place to return a pointer to the root of the dentry tree. Set
664 * to NULL if the file or directory was excluded from capture.
666 * @root_disk_path: The path to the root of the directory tree on disk.
668 * @params: See doc for `struct scan_params'.
670 * @return: 0 on success, nonzero on failure. It is a failure if any of
671 * the files cannot be `stat'ed, or if any of the needed
672 * directories cannot be opened or read. Failure to add the files
673 * to the WIM may still occur later when trying to actually read
674 * the on-disk files during a call to wimlib_write() or
675 * wimlib_overwrite().
678 unix_build_dentry_tree(struct wim_dentry **root_ret,
679 const char *root_disk_path, struct scan_params *params)
683 ret = pathbuf_init(params, root_disk_path);
687 return unix_build_dentry_tree_recursive(root_ret, AT_FDCWD,
688 root_disk_path, params);