2 * unix_capture.c: Capture a directory tree on UNIX.
6 * Copyright (C) 2012-2017 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see http://www.gnu.org/licenses/.
32 #include <sys/types.h>
33 #ifdef HAVE_SYS_XATTR_H
34 # include <sys/xattr.h>
38 #include "wimlib/blob_table.h"
39 #include "wimlib/dentry.h"
40 #include "wimlib/error.h"
41 #include "wimlib/reparse.h"
42 #include "wimlib/scan.h"
43 #include "wimlib/timestamp.h"
44 #include "wimlib/unix_data.h"
45 #include "wimlib/xattr.h"
48 # define my_fdopendir(dirfd_p) fdopendir(*(dirfd_p))
51 my_fdopendir(int *dirfd_p)
56 old_pwd = open(".", O_RDONLY);
58 if (!fchdir(*dirfd_p)) {
62 *dirfd_p = dirfd(dir);
73 # define my_openat(full_path, dirfd, relpath, flags) \
74 openat((dirfd), (relpath), (flags))
76 # define my_openat(full_path, dirfd, relpath, flags) \
77 open((full_path), (flags))
80 #ifdef HAVE_READLINKAT
81 # define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
82 readlinkat((dirfd), (relpath), (buf), (bufsize))
84 # define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
85 readlink((full_path), (buf), (bufsize))
89 # define my_fstatat(full_path, dirfd, relpath, stbuf, flags) \
90 fstatat((dirfd), (relpath), (stbuf), (flags))
92 # define my_fstatat(full_path, dirfd, relpath, stbuf, flags) \
93 ((flags) & AT_SYMLINK_NOFOLLOW) ? \
94 lstat((full_path), (stbuf)) : \
95 stat((full_path), (stbuf))
99 # define AT_FDCWD -100
102 #ifndef AT_SYMLINK_NOFOLLOW
103 # define AT_SYMLINK_NOFOLLOW 0x100
106 #ifdef HAVE_XATTR_SUPPORT
108 * Retrieves the values of the xattrs named by the null-terminated @names of the
109 * file at @path and serializes the xattr names and values into @entries. If
110 * successful, returns the number of bytes used in @entries. If unsuccessful,
111 * returns -1 and sets errno (ERANGE if @entries was too small).
114 gather_xattr_entries(const char *path, const char *names, size_t names_size,
115 void *entries, size_t entries_size)
117 const char * const names_end = names + names_size;
118 void * const entries_end = entries + entries_size;
119 const char *name = names;
120 struct wimlib_xattr_entry *entry = entries;
122 wimlib_assert((uintptr_t)entries % 4 == 0 &&
123 entries_size % 4 == 0 && names_size != 0);
125 size_t name_len = strnlen(name, names_end - name);
129 if (name_len == 0 || name_len >= names_end - name ||
130 (u16)name_len != name_len) {
131 ERROR("\"%s\": malformed extended attribute names list",
138 * Note: we take care to always call lgetxattr() with a nonzero
139 * size, since zero size means to return the value length only.
141 if (entries_end - (void *)entry <= sizeof(*entry) + name_len) {
146 entry->name_len = cpu_to_le16(name_len);
148 value = mempcpy(entry->name, name, name_len);
150 value_len = lgetxattr(path, name, value, entries_end - value);
152 if (errno != ERANGE) {
153 ERROR_WITH_ERRNO("\"%s\": unable to read extended attribute \"%s\"",
158 if ((u32)value_len != value_len) {
159 ERROR("\"%s\": value of extended attribute \"%s\" is too large",
164 entry->value_len = cpu_to_le32(value_len);
167 * Zero-pad the entry to the next 4-byte boundary.
168 * Note: because we've guaranteed that @entries_size is a
169 * multiple of 4, this cannot overflow the @entries buffer.
172 while ((uintptr_t)value & 3) {
178 name += name_len + 1;
179 } while (name < names_end);
181 return (void *)entry - entries;
185 create_xattr_item(const char *path, struct wim_inode *inode,
186 const char *names, size_t names_size)
188 char _entries[1024] _aligned_attribute(4);
189 char *entries = _entries;
190 size_t entries_avail = ARRAY_LEN(_entries);
191 ssize_t entries_size;
195 /* Serialize the xattrs into @entries */
196 entries_size = gather_xattr_entries(path, names, names_size,
197 entries, entries_avail);
198 if (entries_size < 0) {
199 ret = WIMLIB_ERR_STAT;
202 /* Not enough space in @entries. Reallocate it. */
203 if (entries != _entries)
205 ret = WIMLIB_ERR_NOMEM;
207 entries = MALLOC(entries_avail);
213 /* Copy @entries into an xattr item associated with @inode */
214 if ((u32)entries_size != entries_size) {
215 ERROR("\"%s\": too much xattr data!", path);
216 ret = WIMLIB_ERR_STAT;
219 ret = WIMLIB_ERR_NOMEM;
220 if (!inode_set_linux_xattrs(inode, entries, entries_size))
225 if (entries != _entries)
231 * If the file at @path has Linux-style extended attributes, read them into
232 * memory and add them to @inode as a tagged item.
234 static noinline_for_stack int
235 scan_linux_xattrs(const char *path, struct wim_inode *inode)
238 char *names = _names;
239 ssize_t names_size = ARRAY_LEN(_names);
243 /* Gather the names of the xattrs of the file at @path */
244 names_size = llistxattr(path, names, names_size);
245 if (names_size == 0) /* No xattrs? */
247 if (names_size < 0) {
248 /* xattrs unsupported or disabled? */
249 if (errno == ENOTSUP || errno == ENOSYS)
251 if (errno == ERANGE) {
253 * Not enough space in @names. Ask for how much space
254 * we need, then try again.
256 names_size = llistxattr(path, NULL, 0);
259 if (names_size > 0) {
262 names = MALLOC(names_size);
264 ret = WIMLIB_ERR_NOMEM;
270 /* Some other error occurred. */
271 ERROR_WITH_ERRNO("\"%s\": unable to list extended attributes",
273 ret = WIMLIB_ERR_STAT;
278 * We have a nonempty list of xattr names. Gather the xattr values and
279 * add them as a tagged item.
281 ret = create_xattr_item(path, inode, names, names_size);
287 #endif /* HAVE_XATTR_SUPPORT */
290 unix_scan_regular_file(const char *path, u64 blocks, u64 size,
291 struct wim_inode *inode,
292 struct list_head *unhashed_blobs)
294 struct blob_descriptor *blob = NULL;
295 struct wim_inode_stream *strm;
298 * Set FILE_ATTRIBUTE_SPARSE_FILE if the file uses less disk space than
299 * expected given its size.
301 if (blocks < DIV_ROUND_UP(size, 512))
302 inode->i_attributes = FILE_ATTRIBUTE_SPARSE_FILE;
304 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
307 blob = new_blob_descriptor();
310 blob->file_on_disk = STRDUP(path);
311 if (unlikely(!blob->file_on_disk))
313 blob->blob_location = BLOB_IN_FILE_ON_DISK;
315 blob->file_inode = inode;
318 strm = inode_add_stream(inode, STREAM_TYPE_DATA, NO_STREAM_NAME, blob);
322 prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
326 free_blob_descriptor(blob);
327 return WIMLIB_ERR_NOMEM;
331 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
332 int dirfd, const char *relpath,
333 struct scan_params *params);
336 unix_scan_directory(struct wim_dentry *dir_dentry,
337 int parent_dirfd, const char *dir_relpath,
338 struct scan_params *params)
345 dirfd = my_openat(params->cur_path, parent_dirfd, dir_relpath, O_RDONLY);
347 ERROR_WITH_ERRNO("\"%s\": Can't open directory",
349 return WIMLIB_ERR_OPENDIR;
352 dir_dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
353 dir = my_fdopendir(&dirfd);
355 ERROR_WITH_ERRNO("\"%s\": Can't open directory",
358 return WIMLIB_ERR_OPENDIR;
363 struct dirent *entry;
364 struct wim_dentry *child;
366 size_t orig_path_len;
369 entry = readdir(dir);
372 ret = WIMLIB_ERR_READ;
373 ERROR_WITH_ERRNO("\"%s\": Error reading directory",
379 name_len = strlen(entry->d_name);
381 if (should_ignore_filename(entry->d_name, name_len))
384 ret = WIMLIB_ERR_NOMEM;
385 if (!pathbuf_append_name(params, entry->d_name, name_len,
388 ret = unix_build_dentry_tree_recursive(&child, dirfd,
389 entry->d_name, params);
390 pathbuf_truncate(params, orig_path_len);
393 attach_scanned_tree(dir_dentry, child, params->blob_table);
400 * Given an absolute symbolic link target (UNIX-style, beginning with '/'),
401 * determine whether it points into the directory identified by @ino and @dev.
402 * If yes, return the suffix of @target which is relative to this directory, but
403 * retaining leading slashes. If no, return @target.
405 * Here are some examples, assuming that the @ino/@dev directory is "/home/e":
407 * Original target New target
408 * --------------- ----------
410 * /home/e/test/ /test/
411 * //home//e//test// //test//
412 * /home/e (empty string)
414 * /usr/lib /usr/lib (external link)
416 * Because of the possibility of other links into the @ino/@dev directory and/or
417 * multiple path separators, we can't simply do a string comparison; instead we
418 * need to stat() each ancestor directory.
420 * If the link points directly to the @ino/@dev directory with no trailing
421 * slashes, then the new target will be an empty string. This is not a valid
422 * UNIX symlink target, but we store this in the archive anyway since the target
423 * is intended to be de-relativized when the link is extracted.
426 unix_relativize_link_target(char *target, u64 ino, u64 dev)
435 /* Skip slashes (guaranteed to be at least one here) */
444 /* Skip non-slashes (guaranteed to be at least one here) */
447 } while (*p && *p != '/');
449 /* Get the inode and device numbers for this prefix. */
452 ret = stat(target, &stbuf);
456 /* stat() failed. Assume the link points outside the
457 * directory tree being captured. */
461 if (stbuf.st_ino == ino && stbuf.st_dev == dev) {
462 /* Link points inside directory tree being captured.
463 * Return abbreviated path. */
468 /* Link does not point inside directory tree being captured. */
472 static noinline_for_stack int
473 unix_scan_symlink(int dirfd, const char *relpath,
474 struct wim_inode *inode, struct scan_params *params)
476 char orig_target[REPARSE_POINT_MAX_SIZE];
477 char *target = orig_target;
480 /* Read the UNIX symbolic link target. */
481 ret = my_readlinkat(params->cur_path, dirfd, relpath, target,
482 sizeof(orig_target));
483 if (unlikely(ret < 0)) {
484 ERROR_WITH_ERRNO("\"%s\": Can't read target of symbolic link",
486 return WIMLIB_ERR_READLINK;
488 if (unlikely(ret >= sizeof(orig_target))) {
489 ERROR("\"%s\": target of symbolic link is too long",
491 return WIMLIB_ERR_READLINK;
495 /* If the link is absolute and reparse point fixups are enabled, then
496 * change it to be "absolute" relative to the tree being captured. */
497 if (target[0] == '/' && (params->add_flags & WIMLIB_ADD_FLAG_RPFIX)) {
498 int status = WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK;
500 params->progress.scan.symlink_target = target;
502 target = unix_relativize_link_target(target,
503 params->capture_root_ino,
504 params->capture_root_dev);
505 if (target != orig_target) {
506 /* Link target was fixed. */
507 inode->i_rp_flags &= ~WIM_RP_FLAG_NOT_FIXED;
508 status = WIMLIB_SCAN_DENTRY_FIXED_SYMLINK;
510 ret = do_scan_progress(params, status, NULL);
515 /* Translate the UNIX symlink target into a Windows reparse point. */
516 ret = wim_inode_set_symlink(inode, target, params->blob_table);
518 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
519 ERROR("\"%s\": target of symbolic link is not valid "
520 "UTF-8. This is not supported.",
526 /* On Windows, a reparse point can be set on both directory and
527 * non-directory files. Usually, a link that is intended to point to a
528 * (non-)directory is stored as a reparse point on a (non-)directory
529 * file. Replicate this behavior by examining the target file. */
531 if (my_fstatat(params->cur_path, dirfd, relpath, &stbuf, 0) == 0 &&
532 S_ISDIR(stbuf.st_mode))
533 inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
538 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
539 int dirfd, const char *relpath,
540 struct scan_params *params)
542 struct wim_dentry *tree = NULL;
543 struct wim_inode *inode = NULL;
548 ret = try_exclude(params);
549 if (unlikely(ret < 0)) /* Excluded? */
551 if (unlikely(ret > 0)) /* Error? */
554 if (params->add_flags & (WIMLIB_ADD_FLAG_DEREFERENCE |
555 WIMLIB_ADD_FLAG_ROOT))
558 stat_flags = AT_SYMLINK_NOFOLLOW;
560 ret = my_fstatat(params->cur_path, dirfd, relpath, &stbuf, stat_flags);
563 ERROR_WITH_ERRNO("\"%s\": Can't read metadata",
565 ret = WIMLIB_ERR_STAT;
569 if (!(params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA)) {
570 if (unlikely(!S_ISREG(stbuf.st_mode) &&
571 !S_ISDIR(stbuf.st_mode) &&
572 !S_ISLNK(stbuf.st_mode)))
574 if (params->add_flags &
575 WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE)
577 ERROR("\"%s\": File type is unsupported",
579 ret = WIMLIB_ERR_UNSUPPORTED_FILE;
582 ret = do_scan_progress(params,
583 WIMLIB_SCAN_DENTRY_UNSUPPORTED,
589 ret = inode_table_new_dentry(params->inode_table, relpath,
590 stbuf.st_ino, stbuf.st_dev, false, &tree);
592 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
593 ERROR("\"%s\": filename is not valid UTF-8. "
594 "This is not supported.", params->cur_path);
599 inode = tree->d_inode;
601 /* Already seen this inode? */
602 if (inode->i_nlink > 1)
605 #ifdef HAVE_STAT_NANOSECOND_PRECISION
606 inode->i_creation_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
607 inode->i_last_write_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
608 inode->i_last_access_time = timespec_to_wim_timestamp(&stbuf.st_atim);
610 inode->i_creation_time = time_t_to_wim_timestamp(stbuf.st_mtime);
611 inode->i_last_write_time = time_t_to_wim_timestamp(stbuf.st_mtime);
612 inode->i_last_access_time = time_t_to_wim_timestamp(stbuf.st_atime);
614 if (params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA) {
615 struct wimlib_unix_data unix_data;
617 unix_data.uid = stbuf.st_uid;
618 unix_data.gid = stbuf.st_gid;
619 unix_data.mode = stbuf.st_mode;
620 unix_data.rdev = stbuf.st_rdev;
621 if (!inode_set_unix_data(inode, &unix_data, UNIX_DATA_ALL)) {
622 ret = WIMLIB_ERR_NOMEM;
625 #ifdef HAVE_XATTR_SUPPORT
626 ret = scan_linux_xattrs(params->cur_path, inode);
632 if (params->add_flags & WIMLIB_ADD_FLAG_ROOT) {
633 params->capture_root_ino = stbuf.st_ino;
634 params->capture_root_dev = stbuf.st_dev;
635 params->add_flags &= ~WIMLIB_ADD_FLAG_ROOT;
638 if (S_ISREG(stbuf.st_mode)) {
639 ret = unix_scan_regular_file(params->cur_path, stbuf.st_blocks,
640 stbuf.st_size, inode,
641 params->unhashed_blobs);
642 } else if (S_ISDIR(stbuf.st_mode)) {
643 ret = unix_scan_directory(tree, dirfd, relpath, params);
644 } else if (S_ISLNK(stbuf.st_mode)) {
645 ret = unix_scan_symlink(dirfd, relpath, inode, params);
653 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_OK, inode);
655 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
658 free_dentry_tree(tree, params->blob_table);
660 ret = report_scan_error(params, ret);
667 * unix_build_dentry_tree():
668 * Builds a tree of WIM dentries from an on-disk directory tree (UNIX
669 * version; no NTFS-specific data is captured).
671 * @root_ret: Place to return a pointer to the root of the dentry tree. Set
672 * to NULL if the file or directory was excluded from capture.
674 * @root_disk_path: The path to the root of the directory tree on disk.
676 * @params: See doc for `struct scan_params'.
678 * @return: 0 on success, nonzero on failure. It is a failure if any of
679 * the files cannot be `stat'ed, or if any of the needed
680 * directories cannot be opened or read. Failure to add the files
681 * to the WIM may still occur later when trying to actually read
682 * the on-disk files during a call to wimlib_write() or
683 * wimlib_overwrite().
686 unix_build_dentry_tree(struct wim_dentry **root_ret,
687 const char *root_disk_path, struct scan_params *params)
691 ret = pathbuf_init(params, root_disk_path);
695 return unix_build_dentry_tree_recursive(root_ret, AT_FDCWD,
696 root_disk_path, params);
699 #endif /* !__WIN32__ */